def train(args):
    print(args)
    assert not args.gpu or (args.gpu and torch.cuda.is_available())
    random.seed(args.seed)
    data_loader = TrainLoader(args.data_dir)

    train_data = data_loader.train_data
    dev_data = data_loader.dev_data
    test_data = data_loader.test_data

    char_vocab = data_loader.token2id
    tag_vocab = data_loader.tag2id
    char_vocab_size = len(char_vocab)

    print('Training samples:', len(train_data))
    print('Valid samples:', len(dev_data))
    print('Test samples:', len(test_data))

    print(char_vocab)
    print(tag_vocab)

    model = LSTMClassifier(char_vocab_size, args.char_dim, args.hidden_dim,
                           len(tag_vocab), args.gpu)
    if args.gpu:
        model = model.cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.learning_rate,
                          weight_decay=args.weight_decay)

    model = train_model(model, optimizer, train_data, dev_data, char_vocab,
                        tag_vocab, args.batch_size, args.num_epochs, args.gpu)

    save_model(model, {'chars': char_vocab, 'tags': tag_vocab}, args.save_to)
    evaluate_test_set(model, test_data, char_vocab, tag_vocab, args.gpu)
Exemple #2
0
def test(args):
    dataset_test = FirmaData_select_subjects(args.data_dir, 30, args.subset_par[0], args.subset_par[1], args.subset_par[2],args.subjects_list,
                                          subset='test', pre_process=False)
    dat_loader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True)
    if args.test_all:
        for loadid in range(args.num_epochs):
            saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar')
            checkpoint = torch.load(saved_model)

            model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3)
            model.cuda()
            model.load_state_dict(checkpoint['model_state_dict'])
            acc,f1,_= evaluate_test_set(model, dat_loader_test)
            print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1))
    else:
        loadid=args.test_id
        saved_model = os.path.join(args.save_path, 'model_' + str(loadid) + '.tar')
        checkpoint = torch.load(saved_model)
        model = LSTMClassifier(dataset_test[0][0].shape[1], args.hidden_dim, output_size=3)
        model.cuda()
        model.load_state_dict(checkpoint['model_state_dict'])
        acc,f1, _ = evaluate_test_set(model, dat_loader_test)
        print('model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(loadid,acc,f1))
Exemple #3
0
def train(args):
    #subject_lists=[[5,7,9],[12,9,16],[2,11,5],[17,9,6],[1,13,6]]
    #subject_lists = [[1,14,15,2,6,16,7],[3,12,4,15,9,10,2],[10,14,7,11,15,8,17],[16,10,6,5,13,8,12],[17,2,13,4,7,8,16]]
    #subject_lists=[[1,7,2,8,6,11,5,15,9,3],[4,3,10,11,15,7,16,6,14,17],[5,4,12,6,10,8,15,13,2,11],[13,4,6,3,7,12,2,10,16,5],[12,15,17,13,3,9,5,14,8,2]]
    subject_lists=[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]]
    window_sizes=[1,5,15,30,60]
    scns=['shared_data_1','shared_data_2']
    n_weeks=['1_weeks','2_weeks','3_weeks']
    random.seed(args.seed)
    logfile=open('log.txt','w+')
    for scn in scns:
        for n_week in n_weeks:
            data_dir = os.path.join(args.data_dir, scn, n_week)
            for window_size in window_sizes:
                for subjects_list in subject_lists:
                    dataset_train = FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1],
                                                           args.subset_par[2], subjects_list,subset='train', pre_process=False)
                    dataset_val = FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1], args.subset_par[2],subjects_list,
                                                         subset='val', pre_process=False)
                    dataset_test=FirmaData_select_subjects(data_dir, window_size, args.subset_par[0], args.subset_par[1], args.subset_par[2],subjects_list,
                                                  subset='test', pre_process=False)
                    dat_loader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True)
                    dat_loader_val = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=True)
                    dat_loader_test = DataLoader(dataset_test,batch_size=args.batch_size, shuffle=True)
                    model = LSTMClassifier(dataset_train[0][0].shape[1], args.hidden_dim, output_size=len(subjects_list))
                    model.cuda()
                    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
                    save_pa= os.path.join(args.save_path, scn, n_week, str(subjects_list),str(window_size))
                    _, test_id= train_model(model, optimizer, dat_loader_train, dat_loader_val, args.num_epochs, save_pa)
                    saved_model = os.path.join(save_pa, 'model_' + str(test_id) + '.tar')
                    checkpoint = torch.load(saved_model)
                    model.load_state_dict(checkpoint['model_state_dict'])
                    acc, f1, _ = evaluate_test_set(model, dat_loader_test)
                    logfile.write(scn+' '+n_week+' ' + 'subjects:  '+ str(subjects_list)+ 'window_size {} model {} test_accuracy:{:5.4f}, f1_score:{:5.4f}'.format(window_size,test_id,acc,f1) +"\n")
                    logfile.flush()
    logfile.close()
Exemple #4
0
    print('Done !')

    ### Load data
    print('Loading Data ... ', end='')

    d_test = TextDataset(word2idx, fp_test, train=False)
    test_loader = DataLoader(d_test, batch_size=batch_size, shuffle=False)

    print('Done !')

    ### Load model
    print('Loading Model ... ', end='')

    model = LSTMClassifier(embedding_dim, hidden_dim, num_layers, batch_size)
    model.cuda()
    model.load_state_dict(torch.load(fp_model))

    print('Done !')

    ### Predict
    print('Predict ... ', end='')

    pred = predict(model, test_loader)

    print('Done !')

    ### Write
    print('Write ... ', end='')

    df_pred = pd.DataFrame()