def train(self, x_set, y_set):
        """
        Train function, training the model by splitting first the
        train dataset to train and validation, for each epoch we use
        shuffle for the original dataset and split it again. at the end
        of each epoch we use validation function to check accuracy and
        average loss for the specific epoch.
        :param x_set: the complete training dataset.
        :param y_set: the correlated classes.
        """
        loss_sum = 0
        for i in range(EPOCHS):
            x_set, y_set = utils.shuffle(x_set, y_set)
            train_x, train_y, val_x, val_y = utils.split_validation(
                x_set, y_set, VALIDATION_SIZE)
            train_x, train_y = utils.shuffle(train_x, train_y)

            # running of each example from the train dataset.
            for x, y in zip(train_x, train_y):
                x = np.reshape(x, (1, x.shape[0]))
                z1, h1, z2 = self.feedforward(x)
                probs = utils.softmax(self.weights2, h1, self.bias2, CLASSES)
                loss = utils.loss(probs[int(y)])
                loss_sum += loss
                self.backprop(x, y, z1, h1, z2, probs)
            val_loss, acc = self.validation(val_x, val_y)
Esempio n. 2
0
def main():
    if opt.truncate:
        train_data = pickle.load(
            open('../datasets/' + opt.dataset + '/train_shortonly.txt', 'rb'))
    else:
        train_data = pickle.load(
            open('../datasets/' + opt.dataset + '/train.txt', 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data,
                                                  opt.valid_portion)
        test_data = valid_data
    else:
        if opt.truncate:
            test_data = pickle.load(
                open('../datasets/' + opt.dataset + '/test_shortonly.txt',
                     'rb'))
        else:
            test_data = pickle.load(
                open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
    # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb'))
    # g = build_graph(all_train_seq)
    train_data = Data(opt, train_data, shuffle=True)
    test_data = Data(opt, test_data, shuffle=False)
    # del all_train_seq, g
    if opt.dataset == 'diginetica':
        n_node = 43098
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37484
    else:
        n_node = 310

    model = trans_to_cuda(SessionGraph(opt, n_node))
    model = torch.nn.DataParallel(model, device_ids=[0, 1])

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if (hit - best_result[0]) > 0.0001:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if (mrr - best_result[1]) > 0.0001:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' %
              (best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))
Esempio n. 3
0
def main():
    doc_content_list, doc_train_list, doc_test_list, vocab_dic, labels_dic, max_num_sentence, keywords_dic, class_weights = read_file(
        args.dataset, args.use_LDA)

    pre_trained_weight = []
    if args.dataset == 'mr':
        gloveFile = 'data/glove.6B.300d.txt'
        if not os.path.exists(gloveFile):
            print(
                'Please download the pretained Glove Embedding from https://nlp.stanford.edu/projects/glove/'
            )
            return
        pre_trained_weight = loadGloveModel(gloveFile, vocab_dic,
                                            len(vocab_dic) + 1)

    train_data, valid_data = split_validation(doc_train_list,
                                              args.valid_portion, SEED)
    test_data = split_validation(doc_test_list, 0.0, SEED)

    num_categories = len(labels_dic)

    train_data = Data(train_data, max_num_sentence, keywords_dic,
                      num_categories, args.use_LDA)
    valid_data = Data(valid_data, max_num_sentence, keywords_dic,
                      num_categories, args.use_LDA)
    test_data = Data(test_data, max_num_sentence, keywords_dic, num_categories,
                     args.use_LDA)

    model = trans_to_cuda(
        DocumentGraph(args, pre_trained_weight, class_weights,
                      len(vocab_dic) + 1, len(labels_dic)))

    for epoch in range(args.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)

        train_model(model, train_data, args)

        valid_detail, valid_acc = test_model(model, valid_data, args, False)
        detail, acc = test_model(model, test_data, args, False)
        print('Validation Accuracy:\t%.4f, Test Accuracy:\t%.4f' %
              (valid_acc, acc))
Esempio n. 4
0
def main():
    train_data = pickle.load(
        open('../datasets/' + opt.dataset + '/train.txt', 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data,
                                                  opt.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(
            open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
    # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb'))
    # g = build_graph(all_train_seq)
    train_data = Data(train_data, shuffle=True, opt=opt)
    test_data = Data(test_data, shuffle=False, opt=opt)
    # del all_train_seq, g
    if opt.dataset == 'diginetica':
        n_node = 43098
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37484
    elif opt.dataset == 'diginetica_users':
        n_node = 57070
    else:
        n_node = 310

    model = trans_to_cuda(
        SessionGraph(opt, n_node, max(train_data.len_max, test_data.len_max)))

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' %
              (best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))
Esempio n. 5
0
def main():
    train_data = pickle.load(open('./datasets/' + opt.dataset + '/train.txt', 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data, opt.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(open('./datasets/' + opt.dataset + '/test.txt', 'rb'))

    train_data = Data(train_data, shuffle=True)
    test_data = Data(test_data, shuffle=False)
    if opt.dataset == 'diginetica':
        n_node = 43098
    else:
        n_node = 37484


    model = trans_to_cuda(SelfAttentionNetwork(opt, n_node))

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d'% (best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))
def main():
    train_data = pickle.load(open('../datasets/' + opt.dataset + '/train.txt', 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data, opt.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
    # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb'))
    # g = build_graph(all_train_seq)
    train_data = Data(train_data, shuffle=True)
    # <class 'tuple'>: ([[282], [281, 308], [281], [58, 58, 58, 230, 230, 230, 246, 230], [58, 58, 58, 230, 230, 230, 246], [58, 58, 58, 230, 230, 230], [58, 58, 58, 230, 230],
    test_data = Data(test_data, shuffle=False)
    # del all_train_seq, g
    if opt.dataset == 'diginetica':
        n_node = 43098
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37484
    else:
        n_node = 310

    '''
        SessionGraph(
      (embedding): Embedding(310, 100)
      (gnn): GNN(
        (linear_edge_in): Linear(in_features=100, out_features=100, bias=True)
        (linear_edge_out): Linear(in_features=100, out_features=100, bias=True)
        (linear_edge_f): Linear(in_features=100, out_features=100, bias=True)
      )
      (linear_one): Linear(in_features=100, out_features=100, bias=True)
      (linear_two): Linear(in_features=100, out_features=100, bias=True)
      (linear_three): Linear(in_features=100, out_features=1, bias=False)
      (linear_transform): Linear(in_features=200, out_features=100, bias=True)
      (loss_function): CrossEntropyLoss()
    )
    '''
    model = trans_to_cuda(SessionGraph(opt, n_node))  # opt....   n_node 310会话中点的数量,也就是涉及项的数量

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (
            best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))
def main(run):
    train_data = pickle.load(
        open(os.path.join(opt.dataset_folder, 'train.dat'), 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data,
                                                  opt.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(
            open(os.path.join(opt.dataset_folder, 'test.dat'), 'rb'))

    print(test_data[0][0], test_data[1][0])

    cars = pickle.load(
        open(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'), 'rb'))
    item_features = pickle.load(
        open(os.path.join(opt.dataset_folder, 'itemid_features.dat'), 'rb'))

    train_data = Data(train_data, shuffle=True, features=item_features)
    test_data = Data(test_data, shuffle=False, features=item_features)

    n_node = len(cars) + 1  #1149 #6176 #5933 #unique cars
    n_feature_columns = len(item_features[1])
    features_vector = get_feature_vectors(n_node, item_features)

    run.log("Unique No. of Cars", n_node)

    model = trans_to_cuda(
        SessionGraph(opt,
                     n_node,
                     n_feature_columns=n_feature_columns,
                     features=features_vector))

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0

    #Before Training, Predict

    hit, mrr = predict_scores(model, test_data)
    run.log(f'Recall@{opt.top_k}', hit)
    run.log(f'MRR@{opt.top_k}', mrr)

    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)

        hit, mrr, mean_loss = train_test(model, train_data, test_data)

        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1

        #Metrics Capture
        run.log(f'Recall@{opt.top_k}', hit)
        run.log(f'MRR@{opt.top_k}', mrr)
        run.log('Mean Loss', mean_loss)

        print('Current Result:')
        print(
            '\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tMean Loss:\t%.4f,\tEpoch:\t%d,\t%d'
            % (hit, mrr, mean_loss, epoch, epoch))

        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' %
              (best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))

    run.log('Training Time (s)', (end - start))

    #Save Model
    output_folder = opt.output_folder
    os.makedirs(output_folder, exist_ok=True)
    torch.save(model, f'{output_folder}/{opt.model_name}_full.pth')
    torch.save(model.state_dict(), f'{output_folder}/{opt.model_name}.pt')
    shutil.copy(
        os.path.join(opt.dataset_folder, 'itemid_to_vehicle_mapping.dat'),
        f'{output_folder}/{opt.model_name}_item_veh_mapping.dat')
    shutil.copy(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'),
                f'{output_folder}/{opt.model_name}_veh_item_mapping.dat')
    shutil.copy(os.path.join(opt.dataset_folder, 'itemid_features.dat'),
                f'{output_folder}/itemid_features.dat')

    run.log("Model Saved in Outputs", True)
Esempio n. 8
0
def main():
    if args.wandb_on:
        wandb.init(project=args.wandb_project,
                   name=args.model_name + '-' + args.dataset)
        wandb.config.update(
            {'hostname': os.popen('hostname').read().split('.')[0]})
        wandb.config.update(args)

    train_data = pickle.load(open(args.data_folder + args.train_data, 'rb'))

    if args.validation:
        train_data, valid_data = split_validation(train_data,
                                                  args.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(open(args.data_folder + args.valid_data, 'rb'))

    # all_train_seq = pickle.load(open('../../_data/' + args.dataset + '/all_train_seq.txt', 'rb'))
    # g = build_graph(all_train_seq)
    train_data = Data(train_data, shuffle=True)
    test_data = Data(test_data, shuffle=False)
    # del all_train_seq, g
    if args.dataset == 'diginetica':
        n_node = 43098
    elif args.dataset == 'yoochoose1_64' or args.dataset == 'yoochoose1_4':
        n_node = 37484
    else:
        n_node = 310

    model = trans_to_cuda(SessionGraph(args, n_node))
    if args.wandb_on:
        wandb.watch(model, log="all")

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(args.n_epochs):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(epoch, model, train_data, test_data, args)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print(f'\tRecall@{args.top_k}:\t{best_result[0]:.4f}'
              f'\tMRR@{args.top_k}:\t{best_result[1]:.4f}'
              f'\tEpoch:\t{best_epoch[0]},\t{best_epoch[1]}')
        if args.wandb_on:
            wandb.log({
                "best_recall": best_result[0],
                "best_mrr": best_result[1],
                "best_recall_epoch": best_epoch[0],
                "best_mrr_epoch": best_epoch[1]
            })
        bad_counter += 1 - flag
        if bad_counter >= args.patience:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))
Esempio n. 9
0
File: main.py Progetto: kiminh/GIISR
def main():
    train_data = pickle.load(
        open('../datasets/' + opt.dataset + '/train.txt', 'rb'))
    if opt.validation:
        train_data, valid_data = split_validation(train_data,
                                                  opt.valid_portion)
        test_data = valid_data
    else:
        test_data = pickle.load(
            open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
    # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb'))
    # g = build_graph(all_train_seq)
    train_data = Data(train_data, shuffle=True)
    test_data = Data(test_data, shuffle=False)
    # del all_train_seq, g
    if opt.dataset == 'diginetica':
        n_node = 43098
    elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
        n_node = 37484
    else:
        n_node = 310

    model = trans_to_cuda(SessionGraph(opt, n_node))
    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(opt.epoch):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        start_paper = time.time()
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        result = '\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (
            best_result[0], best_result[1], best_epoch[0], best_epoch[1])
        print(result)
        # file_path = "../logs/"+opt.dataset+'batch'+str(opt.batchSize)+opt.method+str(opt.k)+str(opt.nonhybrid)+opt.method_net_last+opt.method_net_last_n1+".txt"
        file_path = "../logs/" + opt.dataset + 'batch' + str(
            opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt"
        with open(file_path, "a") as f:
            f.write(str(result))
            f.write('\n')

        bad_counter += 1 - flag
        if bad_counter >= opt.patience:
            break
        end_paper = time.time()
        time_paper = end_paper - start_paper
        print("each epoch all time->", end_paper - start_paper)
        file_path_time = "../logs_time/" + opt.dataset + 'batch' + str(
            opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt"
        with open(file_path_time, "a") as f:
            f.write(str(time_paper))
            f.write('\n')
    file_path_two = "../logs_loss/" + opt.dataset + 'batch' + str(
        opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt"
    with open(file_path_two, "a") as f:
        f.write('\n')
    file_path_three = "../logs_loss/" + opt.dataset + 'batch' + str(
        opt.batchSize) + opt.method + str(
            opt.k) + opt.distance + "all" + ".txt"
    with open(file_path_three, "a") as f:
        f.write('\n')
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))