コード例 #1
0
ファイル: runit.py プロジェクト: xiao-shen/keystroke
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"):

    print("Chosen feature: {0}".format(featuresToUse) )
    print("Chosen classifier: {0}".format(classifierToUse))

    fe = FeatureExtractor(featuresToUse)
    dataset = DataSet(fe)
    classifier = Classifier()
    evaluate = Evaluation()

    print "test or Train %s" % testOrTrain
    for feature_class, files in getTestData(testOrTrain).items():
        print "%s" % testOrTrain
        for f in files:
            dataset.addFile(feature_class, f)

    print "Dataset initialized"
    print_class_stats(dataset.classes)

    print "Test set created."
    a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9)
    
    c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse)
    
    evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)
コード例 #2
0
    def main(self):
        """
        Method executing the whole pipeline.
        """
        ##
        # get data
        ##
        if (self.dataset in DataLoader.default_datasets
                or os.path.exists(self.dataset)):
            dataLoader = DataLoader(DataLoader.default_datasets[self.dataset])
            data = dataLoader.load()
            self.model_params['img_size'] = data.get_dimensions()
            self.model_params['label_size'] = data.get_label_dimensions()
        else:
            print("Dataset " + self.dataset + " does not exist. Aborting...")
            return -1

        ###
        # Potential Graph creation
        ###
        if not os.path.exists(os.path.join(self.model_folder, 'model.meta')):
            builder = GraphBuilder()
            builder.build_graph(self.model_name, self.model_params)

        ###
        # Network training
        ###
        if self.do_training:
            network = Network(self.model_name, self.model_folder, self.opt,
                              self.opt_params, self.num_epochs,
                              self.batch_size, data, self.summary_folder,
                              self.summary_intervals, self.complete_set,
                              self.keep_prob, self.l2_reg, self.clip_gradient,
                              self.clip_value)
            network.load_and_train()

        ###
        # Evaluation
        ###
        if self.do_eval:
            evaluator = Evaluation(data, self.model_folder,
                                   self.summary_folder, self.model_name,
                                   self.summary_folder, self.batch_size,
                                   **self.eval_params)
            evaluator.evaluate()
            print('Finished Evaluation.')

        ###
        # Tensorboard
        ###
        if self.tensorboard and self.do_training:
            print("Opening Tensorboard")
            os.system("tensorboard --logdir=" + self.summary_folder)
コード例 #3
0
def identifyCorpus(corpus, x=-1):
    """ update corpus with mwedictionaries (type, count, tokens), 
        train, predict and evaluate corpus
    """
    print(XPParams.use_extern_labels)
    if XPParams.use_extern_labels:
        Parser.parse(corpus, "")  # -> prediction
        scores = Evaluation.evaluate(corpus)  # -> evaluate
    else:
        corpus.update()
        clf = EmbeddingOracle.train(corpus, x)  # -> training
        Parser.parse(corpus, clf)  # -> prediction
        scores = Evaluation.evaluate(corpus)  # -> evaluate
    return scores
コード例 #4
0
ファイル: updater.py プロジェクト: wean/coupon-windows
def run(configFile, name):

    OutputPath.init(configFile)
    OutputPath.clear()

    thread = ThreadWritableObject(configFile, name)
    thread.start()

    sys.stdout = thread
    sys.errout = thread  # XXX: Actually, it does NOT work

    try:
        db = Database(configFile, 'specials')
        db.initialize()

        skuManager = SkuManager(configFile, db)

        couponManager = CouponManager(configFile, db)
        discountManager = DiscountManager(configFile, db)
        seckillManager = SeckillManager(db)

        priceHistoryManager = PriceHistoryManager(db)
        evaluation = Evaluation(configFile, db)

        couponManager.update()
        discountManager.update()
        seckillManager.update()

        skuManager.update()
        evaluation.update()

        priceHistoryManager.update()

        evaluation.evaluate()

    except KeyboardInterrupt:
        pass
    except Exception, e:
        print 'Error occurs at', datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        traceback.print_exc(file=sys.stdout)
コード例 #5
0
    def eval_comp(config_name, trial, i, log_i):
        global xp, testcases
        config = configs[config_name]
        for key in log_i._logs.keys():
            print key, len(log_i._logs[key])
        if i == 0:
            config.gui = gui
            config.env_cfg["gui"] = gui
            xp = ToolsExperiment(config, log_dir=log_dir + config_name + "/")
        else:
            xp.ag.fast_forward(log_i)
        xp.ag.eval_mode()

        evaluation = Evaluation(xp.log, xp.ag, xp.env, testcases, modes=["inverse"])
        result = evaluation.evaluate()
        return result
コード例 #6
0
    def eval_comp(config_name, trial, i, log_i):
        global xp, testcases
        config = configs[config_name]
        for key in log_i._logs.keys():
            print key, len(log_i._logs[key])
        if i == 0:
            config.gui = gui
            config.env_cfg['gui'] = gui
            xp = ToolsExperiment(config, log_dir=log_dir + config_name + '/')
        else:
            xp.ag.fast_forward(log_i)
        xp.ag.eval_mode()

        evaluation = Evaluation(xp.log,
                                xp.ag,
                                xp.env,
                                testcases,
                                modes=["inverse"])
        result = evaluation.evaluate()
        return result
コード例 #7
0
ファイル: Linear_MNIST.py プロジェクト: din1881/DL-Framework
    lr_schedular.step()


# save_weights(model, path)

e = Evaluation(10)


for image, label in dataloader_test:
    image = image/255
    predicted = model(image)
    probs = softMax(predicted)
    pred = np.argmax(probs,axis=0)
    e.add_prediction(pred[np.newaxis],label)
print("the confusion Matrix:\n",e.get_confusion_Matrix())
print("the Mean F1 Score:\n",e.evaluate())

model1 = Model()
model1.add(Dense(784, 90))
model1.add(ReLU())
model1.add(Dense(90, 45))
model1.add(ReLU())
model1.add(Dense(45, 10))

model1.set_loss(CrossEntropyLoss())
optimizer1 = GradientDecent(model1.parameters(), learning_rate = 0.01)

epochs = 6
for epoch in range(epochs):
    i = 0
    for image, label in dataloader:
コード例 #8
0
def train(dataset, alpha, A_type, normalize_type, model_pretrained_params,
          model_type, batch_size, test_batch_size, negative_nums, item_emb_dim,
          hid_dim1, hid_dim2, hid_dim3, lr_emb, l2_emb, lr_gcn, l2_gcn, lr_cnn,
          l2_cnn, epochs, params_file_name):
    # init
    if dataset == 'LastFM':
        # use LastFM dataset
        data_obj = LastfmData()
    elif dataset == 'Diginetica':
        # use Diginetica dataset
        data_obj = DigineticaData()
    else:
        # use yoochoose1_64 dataset
        data_obj = YoochooseData(dataset=dataset)

    # gpu device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # init A
    # A: type=scipy.sparse
    A = data_obj.get_decay_adj(
        data_obj.d, tail=None,
        alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d)
    # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric'
    if normalize_type == 'random_walk':
        print('----------------------------------')
        print('Normalize_type is random_walk:')
        A = spmx_1_normalize(A)
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Normalize_type is symmetric:')
        A = spmx_sym_normalize(A)
        print('----------------------------------')
    # transform the adj to a sparse cpu tensor
    A = spmx2torch_sparse_tensor(A)

    # get cpu tensor: labels
    labels = data_obj.get_labels(data_obj.d)

    # get cpu sparse tensor: session adj
    SI = data_obj.get_session_adj(data_obj.d, alpha=alpha)

    # load model pretrained params
    if model_pretrained_params == 'True':
        print('----------------------------------')
        if dataset == 'LastFM':
            # use LastFM params
            print('Use LastFM model pretraned params: ' + params_file_name +
                  '.pkl')
            pretrained_state_dict = torch.load('./lastfm_pretrained_params/' +
                                               params_file_name + '.pkl')
        elif dataset == 'Diginetica':
            # use Diginetica params
            print('Use Diginetica model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./dig_pretrained_params/' +
                                               params_file_name + '.pkl')
        else:
            # use yoochoose1_64 params
            print('Use yoochoose1_64 model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./yoo1_64_pretrained_params/' +
                                               params_file_name + '.pkl')
        print('----------------------------------')
    else:
        pretrained_state_dict = None

    # transform all tensor to cuda
    A = A.to(device)
    labels = labels.to(device)
    SI = SI.to(device)

    # define the evalution object
    evalution5 = Evaluation(k=5)
    evalution10 = Evaluation(k=10)
    evalution15 = Evaluation(k=15)
    evalution20 = Evaluation(k=20)

    # define yoochoose data object
    trainloader = SessionDataloader(train_size=data_obj.train_size,
                                    test_size=data_obj.test_size,
                                    item_size=data_obj.item_size,
                                    labels=labels,
                                    batch_size=batch_size,
                                    train=True,
                                    negative_nums=negative_nums,
                                    shuffle=True)
    testloader = SessionDataloader(train_size=data_obj.train_size,
                                   test_size=data_obj.test_size,
                                   item_size=data_obj.item_size,
                                   labels=labels,
                                   batch_size=test_batch_size *
                                   data_obj.item_size,
                                   train=False,
                                   negative_nums=negative_nums,
                                   shuffle=False)

    # define model, then transform to cuda
    if model_type == 'sgncf1_cnn':
        # use sgncf1_cnn model:
        model = sgncf1_cnn(dataset_nums=data_obj.train_size +
                           data_obj.test_size,
                           item_nums=data_obj.item_size,
                           item_emb_dim=item_emb_dim,
                           hid_dim1=hid_dim1)
    else:
        # use sgncf2_cnn model:
        model = sgncf2_cnn(dataset_nums=data_obj.train_size +
                           data_obj.test_size,
                           item_nums=data_obj.item_size,
                           item_emb_dim=item_emb_dim,
                           hid_dim1=hid_dim1,
                           hid_dim2=hid_dim2)
    model.to(device)

    # update model_state_dict
    if pretrained_state_dict is not None:
        model_state_dict = model.state_dict()
        pretrained_state_dict = {
            k: v
            for k, v in pretrained_state_dict.items() if k in model_state_dict
        }
        model_state_dict.update(pretrained_state_dict)
        model.load_state_dict(model_state_dict)

    # define loss and optim
    criterion = nn.BCEWithLogitsLoss()
    if model_type == 'sgncf1_cnn':
        # use sgncf1 model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)
        optim_cnn = optim.Adam([{
            'params': model.cnn_1d.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                               lr=lr_cnn,
                               weight_decay=l2_cnn)
    else:
        # use sgncf2 model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)
        optim_cnn = optim.Adam([{
            'params': model.cnn_1d.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                               lr=lr_cnn,
                               weight_decay=l2_cnn)

    # figure recall mrr norm
    fig_recalls = []
    fig_mrrs = []
    fig_emb_norms = []
    fig_gcn_norms = []
    fig_cnn_norms = []
    fig_epochs = []

    # train epochs
    for epoch in range(epochs):
        # model training
        start = time.time()

        # test evalution dict
        r = {'5': [], '10': [], '15': [], '20': []}
        m = {'5': [], '10': [], '15': [], '20': []}

        # loss list
        losses = []

        model.train()
        for i, data in enumerate(trainloader):
            # zero optim
            optim_emb.zero_grad()
            optim_gcn.zero_grad()
            optim_cnn.zero_grad()

            # batch inputs
            batch_sidxes, batch_iidxes, batch_labels = data[:, 0].long().to(
                device), data[:,
                              1].long().to(device), data[:,
                                                         2].float().to(device)

            # predicting
            outs = model(batch_sidxes, batch_iidxes, A, SI)

            # loss
            loss = criterion(outs, batch_labels)

            # backward
            loss.backward()

            # optim step
            optim_emb.step()
            optim_gcn.step()
            optim_cnn.step()

            # losses
            losses.append(loss.item())

            # print loss, recall, mrr
            if i % 20 == 19:
                print('[{0: 2d}, {1:5d}, {2: 7d}], loss:{3:.4f}'.format(
                    epoch + 1, int(i * (batch_size / (negative_nums + 1))),
                    data_obj.train_size, np.mean(losses)))

        # print gcn_norm, emb_norm
        emb_norm = get_norm(model, 'emb')
        gcn_norm = get_norm(model, 'gcn')
        cnn_norm = get_norm(model, 'cnn')
        fig_emb_norms.append(emb_norm)
        fig_gcn_norms.append(gcn_norm)
        fig_cnn_norms.append(gcn_norm)
        print('[gcn_norm]:{0:.4f}  [emb_norm]:{1:.4f}  [cnn_norm]:{2:.4f}'.
              format(gcn_norm, emb_norm, cnn_norm))

        # epoch time
        print('[epoch time]:{0:.4f}'.format(time.time() - start))

        # model eval
        model.eval()
        with torch.no_grad():
            for j, d in enumerate(testloader):
                # test batch inputs
                b_sidxes, b_iidxes, b_labels = d[0][:, 0].long().to(
                    device), d[0][:, 1].long().to(device), d[1].to(device)

                # predicting
                o = model(b_sidxes, b_iidxes, A, SI)
                o = o.view(-1, data_obj.item_size)

                # evalution, k=5, 10, 15, 20
                r['5'].append(evalution5.evaluate(o, b_labels)[0])
                r['10'].append(evalution10.evaluate(o, b_labels)[0])
                r['15'].append(evalution15.evaluate(o, b_labels)[0])
                r['20'].append(evalution20.evaluate(o, b_labels)[0])
                m['5'].append(evalution5.evaluate(o, b_labels)[1])
                m['10'].append(evalution10.evaluate(o, b_labels)[1])
                m['15'].append(evalution15.evaluate(o, b_labels)[1])
                m['20'].append(evalution20.evaluate(o, b_labels)[1])

                # print test inf
                # print('[{0: 2d}, {1: 5d}, {2: 7d}]'.format(epoch+1,
                #                                            j * test_batch_size,
                #                                            data_obj.test_size))

            # print test recall mrr
            print('[{0: 2d}]'.format(epoch + 1))
            print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                np.sum(r['5']) / data_obj.test_size,
                np.sum(m['5']) / data_obj.test_size))
            print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                np.sum(r['10']) / data_obj.test_size,
                np.sum(m['10']) / data_obj.test_size))
            print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                np.sum(r['15']) / data_obj.test_size,
                np.sum(m['15']) / data_obj.test_size))
            print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                np.sum(r['20']) / data_obj.test_size,
                np.sum(m['20']) / data_obj.test_size))

            # plt recall and mrr and norm
            fig_epochs.append(epoch)
            fig_recalls.append(np.sum(r['20']) / data_obj.test_size)
            fig_mrrs.append(np.sum(m['20']) / data_obj.test_size)
            plt_evalution(fig_epochs,
                          fig_recalls,
                          fig_mrrs,
                          k=20,
                          alpha=alpha,
                          lr_emb=lr_emb,
                          l2_emb=l2_emb,
                          lr_gcn=lr_gcn,
                          l2_gcn=l2_gcn,
                          model_type=model_type,
                          lr_cnn=lr_cnn,
                          l2_cnn=l2_cnn)
            plt_norm(fig_epochs,
                     fig_emb_norms,
                     fig_gcn_norms,
                     fig_cnn_norms,
                     alpha=alpha,
                     lr_emb=lr_emb,
                     l2_emb=l2_emb,
                     lr_gcn=lr_gcn,
                     l2_gcn=l2_gcn,
                     model_type=model_type,
                     lr_cnn=lr_cnn,
                     l2_cnn=l2_cnn)
コード例 #9
0
                        action='store_true',
                        help='Evaluate the model')
    parser.add_argument('--num_points',
                        type=int,
                        default=2048,
                        help='Num of points to use')
    parser.add_argument('--model_path',
                        type=str,
                        default='',
                        metavar='N',
                        help='Path to load model')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        metavar='N',
                        help='Number of workers to load data')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = get_parser()
    if args.eval == False:
        reconstruction = Trainer(args)
        reconstruction.train()
    else:
        inference = Evaluation(args)
        feature_dir = inference.evaluate()
        svm = SVM(feature_dir)
        svm.classify()
コード例 #10
0
def identifyCorpus(corpus):
    corpus.update()
    clf = EmbeddingOracle.train(corpus)
    Parser.parse(corpus, clf)
    scores = Evaluation.evaluate(corpus)
    return scores
コード例 #11
0
def model_test(dataset, alpha, A_type, normalize_type, model_type,
               negative_nums, item_emb_dim, hid_dim1, hid_dim2,
               model_pretrained_params, params_file_name):
    # init
    if dataset == 'LastFM':
        # use LastFM dataset
        data_obj = LastfmData()
    elif dataset == 'Diginetica':
        # use Diginetica dataset
        data_obj = DigineticaData()
    else:
        # use yoochoose1_64 dataset
        data_obj = YoochooseData(dataset=dataset)

    # gpu device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # init A
    # A: type=scipy.sparse
    A = data_obj.get_decay_adj(
        data_obj.d, tail=None,
        alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d)
    # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric'
    if normalize_type == 'random_walk':
        print('----------------------------------')
        print('Normalize_type is random_walk:')
        A = spmx_1_normalize(A)
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Normalize_type is symmetric:')
        A = spmx_sym_normalize(A)
        print('----------------------------------')
    # transform the adj to a sparse cpu tensor
    A = spmx2torch_sparse_tensor(A)

    # get cpu tensor: labels
    labels = data_obj.get_labels(data_obj.d)

    # get cpu sparse tensor: session adj
    SI = data_obj.get_session_adj(data_obj.d, alpha=alpha)

    # load model pretrained params
    if model_pretrained_params == 'True':
        print('----------------------------------')
        if dataset == 'LastFM':
            # use LastFM params
            print('Use LastFM model pretraned params: ' + params_file_name +
                  '.pkl')
            pretrained_state_dict = torch.load('./lastfm_pretrained_params/' +
                                               params_file_name + '.pkl')
        elif dataset == 'Diginetica':
            # use Diginetica params
            print('Use Diginetica model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./dig_pretrained_params/' +
                                               params_file_name + '.pkl')
        else:
            # use yoochoose1_64 params
            print('Use yoochoose1_64 model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./yoo1_64_pretrained_params/' +
                                               params_file_name + '.pkl')
        print('----------------------------------')
    else:
        pretrained_state_dict = None

    # transform all tensor to cuda
    A = A.to(device)
    labels = labels.to(device)
    SI = SI.to(device)

    # define the evalution object
    evalution5 = Evaluation(k=5)
    evalution10 = Evaluation(k=10)
    evalution15 = Evaluation(k=15)
    evalution20 = Evaluation(k=20)

    # define yoochoose data object
    testloader = SessionDataloader(train_size=data_obj.train_size,
                                   test_size=data_obj.test_size,
                                   item_size=data_obj.item_size,
                                   labels=labels,
                                   batch_size=50 * data_obj.item_size,
                                   train=False,
                                   negative_nums=negative_nums,
                                   shuffle=False)

    # define model, then transform to cuda
    if model_type == 'sgncf1':
        # use sgncf1_cnn model:
        model = sgncf1(dataset_nums=data_obj.train_size + data_obj.test_size,
                       item_nums=data_obj.item_size,
                       item_emb_dim=item_emb_dim,
                       hid_dim1=hid_dim1,
                       pretrained_item_emb=None)
    else:
        # use sgncf2_cnn model:
        model = sgncf2(dataset_nums=data_obj.train_size + data_obj.test_size,
                       item_nums=data_obj.item_size,
                       item_emb_dim=item_emb_dim,
                       hid_dim1=hid_dim1,
                       hid_dim2=hid_dim2,
                       pretrained_item_emb=None)
    model.to(device)

    # update model_state_dict
    if pretrained_state_dict is not None:
        model_state_dict = model.state_dict()
        pretrained_state_dict = {
            k: v
            for k, v in pretrained_state_dict.items() if k in model_state_dict
        }
        model_state_dict.update(pretrained_state_dict)
        model.load_state_dict(model_state_dict)

    # test evalution dict
    r = {'5': [], '10': [], '15': [], '20': []}
    m = {'5': [], '10': [], '15': [], '20': []}

    # model eval
    model.eval()
    with torch.no_grad():
        for j, d in enumerate(testloader):
            # test batch inputs
            b_sidxes, b_iidxes, b_labels = d[0][:, 0].long().to(
                device), d[0][:, 1].long().to(device), d[1].to(device)

            # predicting
            o = model(b_sidxes, b_iidxes, A, SI)
            o = o.view(-1, data_obj.item_size)

            # evalution, k=5, 10, 15, 20
            r['5'].append(evalution5.evaluate(o, b_labels)[0])
            r['10'].append(evalution10.evaluate(o, b_labels)[0])
            r['15'].append(evalution15.evaluate(o, b_labels)[0])
            r['20'].append(evalution20.evaluate(o, b_labels)[0])
            m['5'].append(evalution5.evaluate(o, b_labels)[1])
            m['10'].append(evalution10.evaluate(o, b_labels)[1])
            m['15'].append(evalution15.evaluate(o, b_labels)[1])
            m['20'].append(evalution20.evaluate(o, b_labels)[1])
            print('[{0: 2d}, {1: 4d}]'.format(j * 50, data_obj.test_size))

        # print test recall mrr
        print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
            np.sum(r['5']) / data_obj.test_size,
            np.sum(m['5']) / data_obj.test_size))
        print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
            np.sum(r['10']) / data_obj.test_size,
            np.sum(m['10']) / data_obj.test_size))
        print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
            np.sum(r['15']) / data_obj.test_size,
            np.sum(m['15']) / data_obj.test_size))
        print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
            np.sum(r['20']) / data_obj.test_size,
            np.sum(m['20']) / data_obj.test_size))
コード例 #12
0
def main():
    """
        MAIN OHOHOHOHOHO
    """
    sentiment_lexicon_dict = get_sentiment_lexicon(SENTIMENT_LEXICON_DIR)

    evaluation = Evaluation(DATASET_DIR)

    # RandomModel
    evaluation.evaluate(RandomModel())

    # MajorityModel
    evaluation.evaluate(MajorityModel())

    # LexiconFeaturesModel
    evaluation.evaluate(LexiconFeaturesModel(sentiment_lexicon_dict))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10],
                             negative_around_num=[3, 5, 10],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10],
                             negative_around_num=[3],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[5],
                             negative_around_num=[3, 5, 10],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10, 15],
                             negative_around_num=[3, 5, 10],
                             normalize_data=False))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 15],
                             negative_around_num=[10, 15],
                             normalize_data=False))

    # BertModel
    evaluation.evaluate(
        BertModel(n_words_left_right=1,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=2,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=3,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=4,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=5,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=7,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=50,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=150,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=64,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=128,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.1,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.3,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=32,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=64,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=10))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=15))

    evaluation.evaluate(
        DependencyModel(sentiment_lexicon_dict,
                        positive_around_num=[1, 2, 3, 4, 5],
                        negative_around_num=[1, 2, 3, 4, 5],
                        normalize_data=True))
コード例 #13
0
class App:
    """
    author: huobaolajiao
    """
    def __init__(self, root):
        self.labelusr = tk.Label(root, text='学号:')
        self.labelusr.grid(row=0, sticky=tk.W)
        self.username = tk.StringVar()
        tk.Entry(root, textvariable=self.username).grid(row=0, column=1)
        self.labelpw = tk.Label(root, text='密码:')
        self.labelpw.grid(row=1, sticky=tk.W)
        self.password = tk.StringVar()
        tk.Entry(root, textvariable=self.password, show='*').grid(row=1,
                                                                  column=1)
        self.labelcode = tk.Label(root, text='验证码:')
        self.labelcode.grid(row=2, sticky=tk.W)
        self.code = tk.StringVar()
        tk.Entry(root, textvariable=self.code).grid(row=2, column=1)
        self.button1 = tk.Button(root, text="登陆", command=self.prelogin)
        self.button1.grid(row=3, column=0)
        self.button2 = tk.Button(root, text="更换验证码", command=self.prechange)
        self.button2.grid(row=3, column=2)
        self.info = tk.LabelFrame(root, text='信息栏:                   ')
        self.error = tk.StringVar()
        self.info.grid(row=4, column=1)
        self.Labelerr = tk.Label(self.info,
                                 textvariable=self.error,
                                 wraplength=130,
                                 height=2)  # 可调整调试内容文本框高度
        self.Labelerr.grid()
        self.eva = Evaluation(self.error)
        self.labelimg = tk.Label(root)
        self.labelimg.grid(row=2, column=2)
        self.prechange()

    def prelogin(self):
        self.thread = threading.Thread(target=self.login)
        self.thread.setDaemon(True)
        self.thread.start()  # 调用evaluation是超长时间任务,界面会假死,所以开个新进程

    def login(self):
        self.button1.configure(state='disabled')
        self.button2.configure(state='disabled')
        user = self.username.get()
        password = self.password.get()
        code = self.code.get()
        if self.eva.login(user, password, code):  # 传入文本框中三个参数
            self.eva.evaluate()
        self.change()
        self.button1.configure(state='normal')
        self.button2.configure(state='normal')

    def prechange(self):
        self.thread = threading.Thread(target=self.change)
        self.thread.setDaemon(True)
        self.thread.start()

    def change(self):
        self.button1.configure(state='disabled')
        self.button2.configure(state='disabled')
        tk_image = self.eva.get_login_img()
        self.labelimg.configure(image=tk_image)
        self.labelimg.image = tk_image
        self.button1.configure(state='normal')
        self.button2.configure(state='normal')
コード例 #14
0
class Model(object):
    # 模型名称
    name = "abstract"  # type: str
    # 模型参数K
    param = {}
    # 简单描述
    description = "模型的简单介绍"  # type: str
    # 使用的特征列表
    feature_names = []
    train_x = None  # type: pd.Dataframe
    train_y = None  # type: np.ndarray
    train_y_pred = None  # type: np.ndarray
    test_x = None  # type: pd.Dataframe
    test_y = None  # type: np.ndarray
    test_y_pred = None  # type: np.ndarray
    # 评估结果
    train_ev = None  # type: dict
    test_ev = None  # type: dict

    def __init__(self,
                 feature: Feature,
                 evaluation: Evaluation = None,
                 param=None):
        self.feature = feature
        self.model = None
        # self.train_x = None
        # self.train_y = None
        # self.test_x = None
        # self.test_y = None
        self.y_pred = None
        self.y_true = None
        if evaluation is None:
            self.evaluation = Evaluation(model=self)
        else:
            self.evaluation = evaluation
        if isinstance(param, dict):
            self.param.update(param)

    def select_features(self,
                        df_x: pd.DataFrame,
                        feature_list=None) -> pd.DataFrame:
        if feature_list is None:
            return df_x.filter(regex='^uf_|if_|sf_|kf_', axis=1).copy()
        else:
            return df_x[feature_list]

    def tf_sample(self, df_x: pd.DataFrame, df_y: pd.DataFrame):
        """
        调整正负样本比例
        :param df_x:
        :param df_y:
        :return:
        """
        se_1 = [i for i in range(len(df_y)) if int(df_y[i]) == 1]
        ratio = (1 - df_y.mean()) / df_y.mean()
        len_del = len(se_1) * (1 - ratio) * 0
        random.shuffle(se_1)
        se_1_sub = se_1[:int(len_del)]
        df_x = df_x[~df_x.index.isin(se_1_sub)]
        df_y = [df_y[i] for i in range(len(df_y)) if i not in se_1_sub]
        return df_x, df_y

    def fit(self):
        raise NotImplemented

    def predict(self, x: pd.DataFrame) -> np.ndarray:
        raise NotImplemented

    def save(self):
        raise NotImplemented

    def load(self):
        raise NotImplemented

    def evaluate(self, threshold: float = 0.5):
        return self.evaluation.evaluate(y_true=self.y_true,
                                        y_pred=self.y_pred,
                                        threshold=threshold)

    def test(self, **kwargs):
        """
        评估测试集上的效果
        :return:
        """
        feature_list = kwargs.get('feature_list', None)
        self.test_x = self.select_features(self.feature.features_test,
                                           feature_list)
        self.test_y = self.feature.label_test.values
        self.test_y_pred = self.predict(self.test_x)
        self.test_ev = self.evaluation.evaluate(y_true=self.test_y,
                                                y_pred=self.test_y_pred,
                                                threshold=0.5)
コード例 #15
0
def train(dataset, alpha, A_type, normalize_type, session_type,
          pretrained_item_emb, model_type, batch_size, shuffle, item_emb_dim,
          hid_dim1, hid_dim2, hid_dim3, lr_emb, lr_gcn, l2_emb, l2_gcn,
          epochs):
    # init
    if dataset == 'LastFM':
        # use LastFM dataset
        data_obj = LastfmData()
    elif dataset == 'Diginetica':
        # use Diginetica dataset
        data_obj = DigineticaData()
    else:
        # use yoochoose1_64 dataset
        data_obj = YoochooseData(dataset=dataset)

    # gpu device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # init A
    # A: type=scipy.sparse
    A = data_obj.get_decay_adj(
        data_obj.d, tail=None,
        alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d)
    # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric'
    if normalize_type == 'random_walk':
        print('----------------------------------')
        print('Normalize_type is random_walk:')
        A = spmx_1_normalize(A)
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Normalize_type is symmetric:')
        A = spmx_sym_normalize(A)
        print('----------------------------------')
    # transform the adj to a sparse cpu tensor
    A = spmx2torch_sparse_tensor(A)

    # get cpu tensor: labels
    labels = data_obj.get_labels(data_obj.d)

    # get cpu tensor: item_idxes
    _, _, item_idxes = data_obj.get_indexes()

    if session_type == 'session_hot_items':
        # get cpu sparse tensor: session adj
        session_adj = data_obj.get_session_adj(data_obj.d, alpha=alpha)
    else:
        # if not use session adj, then session_adj = None
        session_adj = None

    if session_type == 'session_last_item':
        # get cpu LongTensor: session_last_item
        session_last_item = data_obj.get_session_last_item(data_obj.d).long()
    else:
        # if not use session_last_item, then session_last_item = None
        session_last_item = None

    # get pretrained_item_emb
    if pretrained_item_emb == 'True' and alpha != 0.0:
        print('----------------------------------')
        if dataset == 'yoochoose1_64':
            print('Use yoochoose1_64 pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './yoo1_64_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        elif dataset == 'yoochoose1_8':
            print('Use yoochoose1_8 pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './yoo1_8_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        elif dataset == 'LastFM':
            print('Use LastFM pretrained item embedding: ' + 'pretrained_emb' +
                  str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './lastfm_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        else:
            print('Use Diginetica pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './dig_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Not use pretrained item embedding:')
        pretrained_item_emb = None
        print('----------------------------------')

    # get cpu LongTensor: item_emb_idxes
    item_emb_idxes = torch.arange(data_obj.item_size).long()

    # transform all tensor to cuda
    A = A.to(device)
    labels = labels.to(device)
    item_idxes = item_idxes.to(device)
    item_emb_idxes = item_emb_idxes.to(device)
    if session_last_item is not None:
        session_last_item = session_last_item.to(device)
    if session_adj is not None:
        session_adj = session_adj.to(device)

    # define the evalution object
    evalution5 = Evaluation(k=5)
    evalution10 = Evaluation(k=10)
    evalution15 = Evaluation(k=15)
    evalution20 = Evaluation(k=20)

    # define yoochoose data object
    trainset = SessionDataset(train_size=data_obj.train_size,
                              test_size=data_obj.test_size,
                              train=True,
                              labels=labels)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=batch_size,
                             shuffle=shuffle)
    testset = SessionDataset(train_size=data_obj.train_size,
                             test_size=data_obj.test_size,
                             train=False,
                             labels=labels)
    testloader = DataLoader(dataset=testset,
                            batch_size=batch_size,
                            shuffle=False)

    # define model, then transform to cuda
    if model_type == 'ngcf1_session_hot_items':
        # use ngcf1_session_hot_items model:
        model = ngcf1_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            pretrained_item_emb=pretrained_item_emb)
    elif model_type == 'ngcf2_session_hot_items':
        # use ngcf2_session_hot_items model:
        model = ngcf2_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            pretrained_item_emb=pretrained_item_emb)
    elif model_type == 'ngcf3_session_hot_items':
        # use ngcf3_session_hot_items model:
        model = ngcf3_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            hid_dim3=hid_dim3,
            pretrained_item_emb=pretrained_item_emb)
    else:
        # use ngcf2_session_last_item model:
        model = ngcf2_session_last_item(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            pretrained_item_emb=pretrained_item_emb)
    model.to(device)

    # define loss and optim
    criterion = nn.CrossEntropyLoss()
    if model_type == 'ngcf1_session_hot_items':
        # use ngcf1_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    elif model_type == 'ngcf2_session_hot_items':
        # use ngcf2_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    elif model_type == 'ngcf3_session_hot_items':
        # use ngcf3_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }, {
            'params': model.gconv3.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    else:
        # use ngcf2_session_last_item model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    # figure recall mrr norm
    fig_recalls = []
    fig_mrrs = []
    fig_emb_norms = []
    fig_gcn_norms = []
    fig_epochs = []

    # train epochs
    for epoch in range(epochs):
        # model training
        start = time.time()

        # train evalution dict
        recall = {'5': [], '10': [], '15': [], '20': []}
        mrr = {'5': [], '10': [], '15': [], '20': []}

        # test evalution dict
        r = {'5': [], '10': [], '15': [], '20': []}
        m = {'5': [], '10': [], '15': [], '20': []}

        # loss list
        losses = []

        model.train()
        for i, data in enumerate(trainloader):
            # zero optim
            optim_emb.zero_grad()
            optim_gcn.zero_grad()

            # batch inputs
            batch_idxes, batch_labels = data[0].long().to(
                device), data[1].long().to(device)

            # predicting
            if model_type == 'ngcf1_session_hot_items':
                # use ngcf1_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            elif model_type == 'ngcf2_session_hot_items':
                # use ngcf2_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            elif model_type == 'ngcf3_session_hot_items':
                # use ngcf3_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            else:
                # use ngcf2_session_last_item model to predict
                outs = model(batch_idxes, A, item_idxes, session_last_item,
                             item_emb_idxes)

            # loss
            loss = criterion(outs, batch_labels)

            # backward
            loss.backward()

            # optim step
            optim_emb.step()
            optim_gcn.step()

            # evalution, k=5, 10, 15, 20
            recall['5'].append(evalution5.evaluate(outs, batch_labels)[0])
            recall['10'].append(evalution10.evaluate(outs, batch_labels)[0])
            recall['15'].append(evalution15.evaluate(outs, batch_labels)[0])
            recall['20'].append(evalution20.evaluate(outs, batch_labels)[0])
            mrr['5'].append(evalution5.evaluate(outs, batch_labels)[1])
            mrr['10'].append(evalution10.evaluate(outs, batch_labels)[1])
            mrr['15'].append(evalution15.evaluate(outs, batch_labels)[1])
            mrr['20'].append(evalution20.evaluate(outs, batch_labels)[1])

            # losses
            losses.append(loss.item())

            # print loss, recall, mrr
            if i % 50 == 49:
                print('[{0: 2d}, {1:5d}]  loss:{2:.4f}'.format(
                    epoch + 1, i + 1, np.mean(losses)))
                print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                    np.mean(recall['5']), np.mean(mrr['5'])))
                print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                    np.mean(recall['10']), np.mean(mrr['10'])))
                print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                    np.mean(recall['15']), np.mean(mrr['15'])))
                print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                    np.mean(recall['20']), np.mean(mrr['20'])))

        # print gcn_norm, emb_norm
        emb_norm = get_norm(model, 'emb')
        gcn_norm = get_norm(model, 'gcn')
        fig_emb_norms.append(emb_norm)
        fig_gcn_norms.append(gcn_norm)
        print('[gcn_norm]:{0:.4f}  [emb_norm]:{1:.4f}'.format(
            gcn_norm, emb_norm))

        # epoch time
        print('[epoch time]:{0:.4f}'.format(time.time() - start))

        # save model
        if epoch % 10 == 9:
            torch.save(
                model.state_dict(),
                'params' + model_type + '-Alpha' + str(alpha) + '_' +
                '_lr_emb' + str(lr_emb) + '_l2_emb' + str(l2_emb) + '_lr_gcn' +
                str(lr_gcn) + '_l2_gcn' + str(l2_gcn) + '.pkl')

        # model eval
        model.eval()
        with torch.no_grad():
            for j, d in enumerate(testloader):
                # test batch inputs
                b_idxes, b_labels = d[0].long().to(device), d[1].long().to(
                    device)

                # predicting
                if model_type == 'ngcf1_session_hot_items':
                    # use ngcf1_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                elif model_type == 'ngcf2_session_hot_items':
                    # use ngcf2_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                elif model_type == 'ngcf3_session_hot_items':
                    # use ngcf3_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                else:
                    # use ngcf2_session_last_item model to predict
                    o = model(b_idxes, A, item_idxes, session_last_item,
                              item_emb_idxes)

                # evalution, k=5, 10, 15, 20
                r['5'].append(evalution5.evaluate(o, b_labels)[0])
                r['10'].append(evalution10.evaluate(o, b_labels)[0])
                r['15'].append(evalution15.evaluate(o, b_labels)[0])
                r['20'].append(evalution20.evaluate(o, b_labels)[0])
                m['5'].append(evalution5.evaluate(o, b_labels)[1])
                m['10'].append(evalution10.evaluate(o, b_labels)[1])
                m['15'].append(evalution15.evaluate(o, b_labels)[1])
                m['20'].append(evalution20.evaluate(o, b_labels)[1])

            # print test recall mrr
            print('[{0: 2d}]'.format(epoch + 1))
            print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                np.mean(r['5']), np.mean(m['5'])))
            print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                np.mean(r['10']), np.mean(m['10'])))
            print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                np.mean(r['15']), np.mean(m['15'])))
            print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                np.mean(r['20']), np.mean(m['20'])))

            # plt recall and mrr and norm
            fig_epochs.append(epoch)
            fig_recalls.append(np.mean(r['20']))
            fig_mrrs.append(np.mean(m['20']))
            plt_evalution(fig_epochs,
                          fig_recalls,
                          fig_mrrs,
                          k=20,
                          alpha=alpha,
                          lr_emb=lr_emb,
                          l2_emb=l2_emb,
                          lr_gcn=lr_gcn,
                          l2_gcn=l2_gcn,
                          model_type=model_type)
            plt_norm(fig_epochs,
                     fig_emb_norms,
                     fig_gcn_norms,
                     alpha=alpha,
                     lr_emb=lr_emb,
                     l2_emb=l2_emb,
                     lr_gcn=lr_gcn,
                     l2_gcn=l2_gcn,
                     model_type=model_type)
コード例 #16
0
class Algoritm:
    def __init__(self, repository):
        self.__repository = repository
        self.__nrOfNodes = int(sqrt(len(repository.getHarta())))
        self.__pop = []
        self.__fit = []
        self.__evalFunc = Evaluation(repository.getHarta(), self.__nrOfNodes)
        self.__minFit = 0
        self.__nrConsMinFit = 0
        self.__lastFit = 0
        self.__fitEvol = []

    def __initialize(self):
        self.__pop.append(Chromozome(self.__nrOfNodes))
        self.__pop.append(Chromozome(self.__nrOfNodes))
        self.__pop.append(Chromozome(self.__nrOfNodes))
        self.__pop.append(Chromozome(self.__nrOfNodes))

    def __evaluate(self):
        for chromozome in self.__pop:
            self.__fit.append(self.__evalFunc.evaluate(chromozome.getRepres()))
            chromozome.setFitness(
                self.__evalFunc.evaluate(chromozome.getRepres()))
        self.__minFit = min(self.__fit)
        self.__nrConsMinFit = 1
        self.__lastFit = self.__fit[3]

    def __selection(self):
        fitSum = sum(self.__fit)
        proportions = []
        roulette = []
        aux = 0
        for fit in self.__fit:
            proportions.append(fit / fitSum)
            roulette.append(aux + (fit / fitSum))
            aux = aux + (fit / fitSum)
        p1 = random.random()
        for i in range(4):
            if (p1 < roulette[i]):
                parent1 = self.__pop[i]
                break
        parent2 = parent1
        while (parent2 == parent1):
            p2 = random.random()
            for i in range(4):
                if (p2 < roulette[i]):
                    parent2 = self.__pop[i]
                    break
        return parent1, parent2

    def __crossover(self, parent1, parent2):
        return parent1.crossover(parent2), parent2.crossover(parent1)

    def _mutation(self, o1, o2):
        return o1.mutation(), o2.mutation()

    def __best(self, o1, o2):
        if (o1.getFitness() < o2.getFitness()):
            return o1
        return o2

    def __worst(self):
        maxFit = max(self.__fit)
        for chromo in self.__pop:
            if (chromo.getFitness() == maxFit):
                return chromo

    def __getFinalResult(self):
        for i in range(len(self.__fit)):
            if (self.__fit[i] == self.__minFit):
                return (self.__pop[i])

    def __writeToFile(self, result):
        reprResult = result.getRepres()
        f = (self.__repository.getFileNameIn().split(".txt"))[0]
        f = open(f + "_solution.txt", "w")
        f.write(str(len(reprResult)))
        f.write('\n')
        f.write(str(reprResult[0]))
        for i in range(1, len(reprResult)):
            f.write("," + str(reprResult[i]))
        f.write('\n')
        f.write(str(self.__fitEvol[0]))
        for i in range(1, len(self.__fitEvol)):
            f.write("," + str(self.__fitEvol[i]))
        f.write('\n')
        f.write(str(result.getFitness()))

    def execute(self):
        #initialize
        self.__initialize()
        self.__evaluate()
        self.__fitEvol.append(self.__minFit)

        while (self.__nrConsMinFit < 300):
            for i in range(4):
                #selection
                selection = self.__selection()
                parent1 = selection[0]
                parent2 = selection[1]

                #crossover
                crossover = self.__crossover(parent1, parent2)
                o1 = crossover[0]
                o2 = crossover[1]

                #mutation
                mutation = self._mutation(o1, o2)
                o1 = mutation[0]
                o2 = mutation[1]

                #evaluation
                fit1 = self.__evalFunc.evaluate(o1.getRepres())
                o1.setFitness(fit1)
                fit2 = self.__evalFunc.evaluate(o2.getRepres())
                o2.setFitness(fit2)

                #modify generation
                b = self.__best(o1, o2)
                w = self.__worst()
                self.__pop.remove(w)
                self.__fit.remove(w.getFitness())
                self.__pop.append(b)
                self.__fit.append(b.getFitness())
                self.__minFit = min(self.__fit)
                if (self.__fitEvol[len(self.__fitEvol) - 1] == self.__minFit):
                    self.__nrConsMinFit += 1
                else:
                    self.__nrConsMinFit = 1
                self.__fitEvol.append(self.__minFit)
                # self.__lastFit=b.getFitness()
        self.__writeToFile(self.__getFinalResult())
コード例 #17
0
ファイル: train.py プロジェクト: nidohsp/Flashback_code
                    h[1][0, j] = hc[1]
                else:
                    h[0, j] = h0_strategy.on_reset(active_users[0][j])
        
        x = x.squeeze().to(setting.device)
        t = t.squeeze().to(setting.device)
        s = s.squeeze().to(setting.device)
        y = y.squeeze().to(setting.device)
        y_t = y_t.squeeze().to(setting.device)
        y_s = y_s.squeeze().to(setting.device)                
        active_users = active_users.to(setting.device)
        
        optimizer.zero_grad()
        loss, h = trainer.loss(x, t, s, y, y_t, y_s, h, active_users)
        loss.backward(retain_graph=True)
        losses.append(loss.item())
        optimizer.step()
    
    # schedule learning rate:
    scheduler.step()
    
    # statistics:
    if (e+1) % 1 == 0:
        epoch_loss = np.mean(losses)
        print(f'Epoch: {e+1}/{setting.epochs}')
        print(f'Used learning rate: {scheduler.get_lr()[0]}')
        print(f'Avg Loss: {epoch_loss}')
    if (e+1) % setting.validate_epoch == 0:        
        print(f'~~~ Test Set Evaluation (Epoch: {e+1}) ~~~')
        evaluation_test.evaluate()
コード例 #18
0
class L(threading.Thread):
    totalCount = 0

    #Initialize all the components#
    def __init__(self, id, nr):
        self.domain = Domain()
        self.phrasebase = Phrasebase()
        self.parsing = Parsing()
        self.evaluation = Evaluation(self.domain.getTopics(),
                                     self.domain.getPraise(),
                                     self.domain.getCriticism())
        super(L, self).__init__()
        self.ui = UI(nr)
        self.id = id
        L.totalCount += 1
        self.displayCount()
        self.running = True

    def getUI(self):
        return self.ui

    #main method, organize the whole structure of the dialogue#
    def run(self):

        #setup
        self.parsing.setui(self.ui)
        warnings.filterwarnings(module='sklearn*',
                                action='ignore',
                                category=DeprecationWarning)

        #introductory greeting of the user, general explanations and so on
        self.intro()

        #getting stories to ask
        total_number = 6
        stories = self.organizeStories(total_number)

        #main loop for asking all stories
        for story in stories:
            self.askStory(story)

        #get the evaluation and tell it to the user
        eval = self.evaluation.evaluate()
        for e in eval:
            self.ui.tell(e)

        #say goodbye
        self.goodbye()

    #just for keeping track of how many lizas there are
    def displayCount(self):
        print(
            "I am the %d. Liza started here since the last restart of the program."
            % L.totalCount)

    #display a final goodbye message, and then a "system message" stating the liza has disconnected.
    def goodbye(self):
        self.ui.tell("Goodbye!")
        self.ui.info("Liza has disconnected.")
        sys.exit()

    #For asking if the user wants to take a break
    def askbreak(self):
        self.ui.tell("Do you want to continue with this?")
        answer = self.ui.listen()
        if (answer == ""):
            self.ui.tell(
                "Do you want to continue? If you don't reply, I have to assume that you are gone..."
            )
            answer = self.ui.listen()
            if (answer == ""):
                self.ui.tell("Well, okay... I guess we can't continue then.")
                self.goodbye()
        meaning = self.parsing.parse(answer)
        if meaning == "yes":
            self.ui.tell("Okay, great! So let's return to the questions.")
            return True
        self.ui.tell("Oh. I am sorry.")
        self.goodbye()

    #for asking a question and continuing until we finally got an answer
    def askForever(self, question):
        answer = ""
        time = 0
        self.ui.tell(question)
        self.ui.prompt()
        while True:
            time = time + 1
            answer = self.ui.listen()
            if answer == "" and time < 4:
                self.ui.tell(self.phrasebase.getReply("waiting"))
                self.ui.prompt()
            if answer == "" and time >= 4 and time < 7:
                self.ui.tell(self.phrasebase.getReply("worried_waiting"))
                self.ui.prompt()
            if time >= 7 and time < 10:
                continue
            if time >= 10:
                self.ui.tell(
                    "It seems that you are a bit distracted right now. Or maybe dead. ... ...I hope you are not dead. But because it's been a while since your last answer..."
                )
                self.ui.prompt()
                self.askbreak()
                time = 0
                self.ui.tell("So my question was: " + question)

            if answer != "":
                return answer

    def askStory(self, story):
        #if story type is already explained, go on
        #otherwise, explain
        if not self.domain.getExplained(story.group):
            self.ui.tell(self.phrasebase.getReply("new_topic"))
            self.ui.tell(self.domain.groups[story.group])
            answer = self.askForever(
                self.phrasebase.getReply("want_topic_explanation"))
            meaning = self.parsing.parse(answer)
            if meaning == "yes":
                self.ui.tell(self.domain.getExplanation(story.group))
                self.domain.setExplained(story.group)
                self.ui.tell("Now let's get to the question!")
            if meaning == "no":
                self.ui.tell("Ok, then let's get right to the question.")
            else:
                self.ui.tell("Oh, I really like to explain things.")
                self.ui.tell(self.domain.getExplanation(story.group))
                self.domain.setExplained(story.group)
                self.ui.tell("Now let's get to the question!")

        if (len(story.intro) > 0):
            answer = self.askForever(story.intro)
            meaning = self.parsing.parse(answer)
            if "yes" in meaning:
                self.ui.tell(story.introyes)
            elif "no" in meaning:
                self.ui.tell(story.introno)
            else:
                self.ui.tell(self.phrasebase.getReply("meaninglesses"))

        self.ui.tell(story.text)
        self.ui.tell(story.question)
        answer = self.ui.listenLong()

        if answer == "" or answer == None:
            print("seems like they don't know the answer...")
            self.ui.tell(self.phrasebase.getReply("offer_hint"))
            self.ui.prompt()
            answer = self.ui.listen()
            meaning = self.parsing.parse(answer)
            #print(answer)
            #print(meaning)
            if meaning == "yes":
                self.ui.tell(story.hint)
                self.ui.prompt()
            if meaning == "no":
                self.ui.tell("ok!")

            meaning = self.parsing.parse(answer)
            if not meaning == "correct" or meaning == "incorrect":
                answer = self.askForever(
                    self.phrasebase.getReply("introbla") + story.question)
                #print(answer)

        meaning = self.parsing.parseQuiz(answer, story)
        #print(meaning)

        if meaning == "whatquestion":
            self.ui.tell("The question was: " + story.question)

    #  if meaning == "hint":
    #    self.ui.tell(story.hint)
    #    answer = askForever(self.phrasebase.getReply("introbla") + story.question)

        if meaning == "explain":
            self.ui.tell(self.phrasebase.getReply("offer_explanation"))
            self.ui.prompt()
            answer = self.ui.listen()
            meaning = self.parsing.parse(answer)
            if meaning == "yes":
                self.ui.tell(story.explain)
                #explained the answer
                #student model: didn't know the answer
                return

            if meaning == "no":
                self.ui.tell("ok!")
            answer = self.askForever(
                self.phrasebase.getReply("introbla") + story.question)
            meaning = self.parsing.parse(answer)

        if meaning == "correct":
            if (randint(0, 1) > 0):
                self.askCalibration(True)
            self.ui.tell(
                story.answercorrect
            )  #you could also use self.phrasebase.getReply("correct")
            self.evaluation.answer(story, True)

            #student model: knew the answer

        elif meaning == "incorrect":
            if (randint(0, 1) > 0):
                self.askCalibration(False)

            self.evaluation.answer(story, False)
            self.ui.tell(
                story.answerincorrect
            )  #you could also use self.phrasebase.getReply("incorrect")
            self.ui.tell(self.phrasebase.getReply("offer_explanation"))
            self.ui.prompt()
            answer = self.ui.listen()
            meaning = self.parsing.parse(answer)
            if meaning == "yes":
                self.ui.tell(story.explain)

            return

    def askCalibration(self, correctness):
        self.ui.tell(self.phrasebase.getReply("howsure"))
        answer = self.ui.listen()
        percent = self.parsing.parsePercent(answer)
        self.evaluation.calibrate(correctness, percent)

    def organizeStories(self, total):
        storylist = []
        topic = 0
        for i in range(0, total):
            topic = topic % len(self.domain.getTopics())
            story = self.domain.getStory(topic)
            storylist.append(story)
            topic = topic + 1
        return storylist

    def intro(self):
        #the bot introduces itself
        answer = self.askForever(self.phrasebase.getReply("greetings"))
        meaning = self.parsing.parse(answer)
        #print(meaning)
        if "greet" in meaning or "yes" in meaning:
            self.ui.tell("Nice to meet you!")
        answer = self.askForever("You are a human, aren't you?")
        meaning = self.parsing.parse(answer)
        if meaning == "yes":
            self.ui.tell("Great! This means you can help me.")
        if meaning == "no":
            answer = self.askForever(
                "Are you sure? What is the sum of two and three?")
            meaning = self.parsing.parse(answer)
            if meaning == "five":
                self.ui.tell(
                    "See, you solved the captcha. You are sufficiently human for my purposes."
                )
            else:
                self.ui.tell("Very funny. You are definitely human.")

        self.ui.tell(
            "My programmers want me to teach you how to be rational, make good decisions and judge situations correctly."
        )
        answer = self.askForever("Do you want to be more rational?")

        meaning = self.parsing.parse(answer)

        if meaning == "yes":
            self.ui.tell("Yeah, that's the spirit!")

        if meaning == "no":
            answer = self.askForever(
                "Why would you think that? Rationality is just the ability to make good decisions. Do you want to be able to make good decisions?"
            )
            meaning = self.parsing.parse(answer)
            if meaning == "no":
                self.askbreak()

        self.ui.tell(
            "I will just try to ask you some questions, and try to explain to you what you could do better. If I do a bad job at explaning, just ask me, ok? I never taught humans before."
        )
        self.ui.tell(
            "So, let's see... the first thing I want you to know is that you don't have to be extremely intelligent to be rational. There are very intelligent people who do things that are not at all reasonable. The key to rational decisions is to know when not to follow your gut feelings, but to stop and actually think about the problem."
        )
        answer = self.askForever(
            "To get used to the whole situation - how about I ask you a test question? Just to make sure I am doing this teaching thing right. "
        )
        meaning = self.parsing.parse(answer)
        if meaning == "yes":
            self.ui.tell("Okay, thank you!")
        if meaning == "no":
            self.ui.tell("I would nevertheless like to ask the test question.")

        answer = self.askForever(
            "This is my first question: Do people need to follow their gut feelings to make rational decisions?"
        )
        meaning = self.parsing.parse(answer)
        if meaning == "no":
            self.ui.tell(
                "Amazing! I mean, it was easy, I know, but you did it. Very reasonable of you to say this! Now we can start with the actual teaching."
            )

        if meaning == "yes":
            self.ui.tell(
                "Uhm... no. This is a bit awkward. Following you gut feelings means not to think about something, but just go with what feels right. A lot of psychologists have shown that people tend to make a lot of mistakes when they make decisions that way."
            )
            answer = self.askForever("Do you still want to continue?")
            meaning = self.parsing.parse(answer)
            if meaning == "yes":
                self.ui.tell("Okay! Let's start with the actual teaching!")
            else:
                self.askbreak()