コード例 #1
0
def test(opt):

    # 载入best model
    best_model = torch.load(opt.best_model, map_location='cpu')
    model = best_model['model']

    # 移动到GPU
    model = model.to(opt.device)

    # loss function
    criterion = nn.CrossEntropyLoss().to(opt.device)

    # dataloader
    test_loader = torch.utils.data.DataLoader(
        SSTreebankDataset(opt.data_name, opt.output_folder, 'test'),
        batch_size=opt.batch_size,
        shuffle=True,
        num_workers=opt.workers if opt.is_Linux else 0,
        pin_memory=True)

    # test
    testing(test_loader, model, criterion, opt.print_freq, opt.device)
コード例 #2
0
def main():
    args = parse_args()
    arch = args.arch
    where = args.data_dir
    learning_rate = args.learning_rate
    epochs = args.epochs
    save_dir = args.save_dir
    hidden_units = int(args.hidden_units)
    devices = args.devices
    if devices == 'gpu':
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    else:
        device = torch.device('cpu')

    train_data, valid_data, test_data, trainloader, validloader, testloader = load_data()
    model, optimizer, classifier = train_model(args, device)
    print ("Training complete.")
    testing(model, testloader, device)
    print ("Testing complete.")
    model.class_to_idx = train_data.class_to_idx
    path = args.save_dir
    save_checkpoint(path, model, optimizer, args, classifier)
    print ("checkpoint created.")
コード例 #3
0
    net = net.eval()  # set eval mode
    acc_val, val_RMSE, _ = accuracy(net,
                                    validation_loader,
                                    tolerance,
                                    criterion,
                                    device,
                                    network,
                                    eval=True)
    print('validation accuracy with {} tolerance = {:.2f} and RMSE = {:.6f}\n'.
          format(tolerance, acc_val, val_RMSE))

    ###################################################################################################################
    ###################      5. Using the model (testing)     #######################

    test_loss, test_RMSE = testing(model_path, data_path, counter, net,
                                   criterion, device, network, load_model)

    ###################################################################################################################
    ###################      6. Plotting the results      #######################

    # Set the font dictionaries (for plot title and axis titles)
    title_font = {
        'fontname': 'Arial',
        'size': '16',
        'color': 'black',
        'weight': 'normal',
        'verticalalignment': 'bottom'
    }  # Bottom vertical alignment for more space
    axis_font = {'fontname': 'Arial', 'size': '16'}

    # uncomment below to plot the losses along with different learning rates
コード例 #4
0
def problem_solving(nb, problem, problem_languages, args, time_start):

    if problem_languages[nb] == "pl":
        pass

    print(problem)
    local_path = get_problem_truth(args.c, problem)
    print(local_path)
    problem_collection, number_of_texts = tagging_problem(
        local_path, problem_languages[nb])

    print('tagged')

    authors = make_authors_list(problem_collection)
    print('authors defined')

    freq1 = args.freq1
    freq2 = args.freq2

    training_set_size, test_set_size = set_sizes(problem_collection)

    random.seed(time.time())

    trunc_words1, trunc_words2 = create_char_ngrams_stat(
        problem_collection, freq2, problem_languages[nb])

    problem_collection = filter_problem_corpus(problem_collection,
                                               trunc_words1, trunc_words2,
                                               problem_languages[nb])

    problem_collection, nb_categories = create_ngrams_and_splitgrams(
        problem_collection)

    words_encoder, words_num = stats_for_ngrams_and_skipgrams(
        problem_collection, nb_categories, freq1)

    freq_feature, words_num = vectorise_problem_corpus(problem_collection,
                                                       words_encoder,
                                                       words_num, frequency,
                                                       number_of_texts)

    freq_feature_form_norm, network_sizes = compute_mean_and_std(
        freq_feature, problem_collection, words_num)

    model_test = define_model(network_sizes, len(authors), len(words_encoder))
    optimiser_test = define_optimiser(model_test)
    bceloss = torch.nn.NLLLoss()
    if use_cuda:
        bceloss = bceloss.cuda()

    mseloss = torch.nn.MSELoss()
    if use_cuda:
        mseloss = mseloss.cuda()

    model = training(model_test, training_set_size, problem_collection,
                     authors, bceloss, optimiser_test, freq_feature_form_norm)

    print('after training')

    result = testing(problem_collection, model, authors,
                     freq_feature_form_norm)

    print('after testing')

    with open(os.path.join(args.o, 'answers-{}.json'.format(problem)),
              'w') as outfile:
        json.dump(result, outfile)

    time_now = time.time()

    timing = time_now - time_start
    print(as_minutes(timing))

    print('sdadkashdksadfksahfksafhksadhf')
    return
コード例 #5
0
                      dim=1).squeeze()  # size: batch_size * 600
        p = self.drop(p)
        out = self.linear(p).squeeze()

        return out


### load the SST dataset ###
train_iter, val_iter, test_iter, TEXT, LABEL = loadSST()

### train the model ###

# CNN-non-static
model = CNN(len(TEXT.vocab), TEXT.vocab.vectors.size(1), TEXT.vocab.vectors)
optimizer = optim.Adadelta(model.parameters(), lr=0.1)

# CNN-static
# model = CNN(len(TEXT.vocab),TEXT.vocab.vectors.size(1),TEXT.vocab.vectors,static=True)
# optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()),lr=0.1)

# CNN-multichannel
# model = CNN_2channel(len(TEXT.vocab),TEXT.vocab.vectors.size(1),TEXT.vocab.vectors)
# optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()),lr=0.1)

num_epoch = 10
model = training(train_iter, model, num_epoch, optimizer)

### test the model ###

test_loss, accuracy_test = testing(test_iter, model)
コード例 #6
0
def problem_solving(nb, problem, problem_languages, args, time_start):

    if True:

        #problem = 'problem00001'
        #nb = 0
        if problem_languages[nb] == "pl":
            pass  #continue
        #if (nb != 0 and nb != 0):
        #    continue

        print(problem)
        local_path = get_problem_truth(args.c, problem)
        print(local_path)
        #global problem_collection
        problem_collection, number_of_texts = tagging_problem(
            local_path, problem_languages[nb])

        print('tagged')

        #gc.collect()
        #save_tools(problem_collection_, 'problem_collection_anfang')
        #save_tools(number_of_texts, 'number_of_texts')

        #problem_collection_ = load_tools('problem_collection_anfang')
        #number_of_texts = load_tools('number_of_texts')
        #save_tools(number_of_texts, 'number_of_texts')
        authors = make_authors_list(problem_collection)
        print('authors defined')

        #quit()
        results = []

        #frequency = random.choice([200,220,240,260,280,300])
        #freq1 = random.choice([100,150,200,250,300,350])
        #freq2 =  random.choice([100,150,200,250,300,350])
        #frequency_ = [200,220,240,260,280,300]
        #freq1_ = [100,150,200,250,300,350]
        #freq2_ =  [100,150,200,250,300,350]

        if True:  #for x in range(1):
            #break
            #problem_collection = copy.deepcopy(problem_collection_)

            #frequency = 3000#random.choice([500,600,800,1000,1200,1500])
            #freq1 = 100#random.choice([100,150,200,250,300,350])
            #freq2 =  200#random.choice([100,150,200,250,300,350])

            #training_set_size, test_set_size = set_sizes(problem_collection)

            #random.seed(time.time())

            #print(frequency, freq1, freq2)
            #trunc_words1, trunc_words2 = create_char_ngrams_stat(problem_collection, freq1, freq2, problem_languages[nb])

            #problem_collection = filter_problem_corpus(problem_collection, trunc_words1, trunc_words2, problem_languages[nb])

            #problem_collection, nb_categories = create_ngrams_and_splitgrams(problem_collection)

            #words_encoder, words_num = stats_for_ngrams_and_skipgrams(problem_collection, nb_categories, frequency)

            #problem_collection, freq_feature, words_num = vectorise_problem_corpus(problem_collection, words_encoder, words_num, frequency, number_of_texts)

            #freq_feature_form_norm, pca, network_sizes = compute_mean_and_std(freq_feature, problem_collection,words_num)
            ################################
            #noisy_labels = cluster_test(problem_collection, len(freq_feature), authors, freq_feature_form_norm)

            frequency = 8000  #random.choice([500,600,800])
            freq1 = 400  #random.choice([100,150,200,250,300,350])
            freq2 = 1000  #random.choice([100,150,200,250,300,350])

            training_set_size, test_set_size = set_sizes(problem_collection)

            random.seed(time.time())

            print(frequency, freq1, freq2)
            #del problem_collection
            trunc_words1, trunc_words2 = create_char_ngrams_stat(
                problem_collection, freq1, freq2, problem_languages[nb])

            problem_collection = filter_problem_corpus(problem_collection,
                                                       trunc_words1,
                                                       trunc_words2,
                                                       problem_languages[nb])

            problem_collection, nb_categories = create_ngrams_and_splitgrams(
                problem_collection)

            words_encoder, words_num = stats_for_ngrams_and_skipgrams(
                problem_collection, nb_categories, frequency)

            freq_feature, words_num = vectorise_problem_corpus(
                problem_collection, words_encoder, words_num, frequency,
                number_of_texts)

            freq_feature_form_norm, pca, network_sizes = compute_mean_and_std(
                freq_feature, problem_collection, words_num)

            #result = cluster_test(problem_collection, len(freq_feature), authors, freq_feature_form_norm)

            #save_tools(problem_collection, 'problem_collection')
            #save_tools(words_encoder, 'words_encoder')
            #save_tools(words_num, 'words_num')
            #save_tools(freq_feature, 'freq_feature')

            #problem_collection = load_tools('problem_collection')
            #words_encoder = load_tools('words_encoder')
            #words_num = load_tools('words_num')
            #freq_feature = load_tools('freq_feature')
            #print('tutaj')

            #global model_test
            #model_train = define_model(network_sizes, len(authors), freq_feature_form_norm,len(words_encoder))
            model_test = define_model(network_sizes,
                                      len(authors), freq_feature_form_norm,
                                      len(words_encoder))
            #model = define_model(network_sizes, len(authors), freq_feature_form_norm,len(words_encoder))

            #global optimiser_test

            #optimiser_train = define_optimiser(model_train)
            optimiser_test = define_optimiser(model_test)
            bceloss = torch.nn.NLLLoss()
            if use_cuda:
                bceloss = bceloss.cuda()

            mseloss = torch.nn.MSELoss()
            if use_cuda:
                mseloss = mseloss.cuda()

            #global model
            model = training([None, model_test], training_set_size,
                             problem_collection, authors, bceloss, mseloss,
                             (None, optimiser_test), freq_feature_form_norm,
                             None)

            print('after training')

            result = testing(problem_collection, model, authors,
                             freq_feature_form_norm, None)

            print('after testing')

            with open(os.path.join(args.o, 'answers-{}.json'.format(problem)),
                      'w') as outfile:
                json.dump(result, outfile)

            #results.append(result)

            del model_test, optimiser_test, bceloss, mseloss, outfile
            #gc.collect()
            del freq_feature_form_norm, pca, network_sizes, result, freq_feature, words_num
            #gc.collect()
            del trunc_words1, trunc_words2, nb_categories, words_encoder, training_set_size, test_set_size
            #gc.collect()
            del problem_collection, model
            #del globals()['problem_collection'], globals()['model']
            #del globals()['optimiser_test']
            #del globals()['model_test']
            #gc.collect()
            time_now = time.time()

            timing = time_now - time_start
            print(as_minutes(timing))

            #gc.collect()

        del number_of_texts, authors
        gc.collect()

        #save_tools(results, problem)
        #quit()

        #quit()

    print('sdadkashdksadfksahfksafhksadhf')
    return