예제 #1
0
def main(args):
    if args.model == 'LSTM':
        model = LSTM(input_dim=args.input_dim, lstm_hidden_dim=args.lstm_hidden_dim, time_step=args.time_step)
    else:
        raise ValueError
    model.cuda()

    if args.optim == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optim == 'SGD_momentum':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay)
    elif args.optim == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optim == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, alpha=0.999, eps=1e-8, weight_decay=args.weight_decay)
    elif args.optim == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=args.weight_decay)
    else:
        raise ValueError

    lr_scheduler = None
    if args.load_path:
        if args.recover:
            load_model(model, args.load_path, strict=True)
            print('load model state dict in {}'.format(args.load_path))

    map_file_path = 'divide.csv'
    data_file_path = 'processed_data.txt'
    social_economical_path = '2010-2016.csv'
    if args.dataset == 'NaiveDataset':
        train_set = NaiveDataset(data_file_path, map_file_path)
    elif args.dataset == 'AdvancedDataset':
        train_set = AdvancedDataset(data_file_path, map_file_path, social_economical_path)
    else:
        raise ValueError
    train_dataloader = DataLoader(train_set, batch_size=args.batch_size, 
                                  shuffle=True, num_workers=args.num_workers,
                                  pin_memory=True)

    if args.evaluate:
        validate(train_dataloader, model)
        return

    train(train_dataloader, train_dataloader, model, optimizer, lr_scheduler, args)
예제 #2
0
def fit(args):
    exp_name = get_experiment_name(args)
    logging_path = os.path.join(args.save_path, exp_name) + ".log"
    logging.basicConfig(filename=logging_path,
                        level=logging.INFO,
                        format="%(message)s")
    seed_everything(args.seed)
    label_map = load_label_map(args.dataset)

    if args.use_cnn:
        train_dataset = FeaturesDatset(
            features_dir=os.path.join(args.data_dir,
                                      f"{args.dataset}_train_features"),
            label_map=label_map,
            mode="train",
        )
        val_dataset = FeaturesDatset(
            features_dir=os.path.join(args.data_dir,
                                      f"{args.dataset}_val_features"),
            label_map=label_map,
            mode="val",
        )

    else:
        train_dataset = KeypointsDataset(
            keypoints_dir=os.path.join(args.data_dir,
                                       f"{args.dataset}_train_keypoints"),
            use_augs=args.use_augs,
            label_map=label_map,
            mode="train",
            max_frame_len=169,
        )
        val_dataset = KeypointsDataset(
            keypoints_dir=os.path.join(args.data_dir,
                                       f"{args.dataset}_val_keypoints"),
            use_augs=False,
            label_map=label_map,
            mode="val",
            max_frame_len=169,
        )

    train_dataloader = data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
    )
    val_dataloader = data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
    )

    n_classes = 50
    if args.dataset == "include":
        n_classes = 263

    if args.model == "lstm":
        config = LstmConfig()
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = LSTM(config=config, n_classes=n_classes)
    else:
        config = TransformerConfig(size=args.transformer_size)
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = Transformer(config=config, n_classes=n_classes)

    model = model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(),
                                  lr=args.learning_rate,
                                  weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.2)

    if args.use_pretrained == "resume_training":
        model, optimizer, scheduler = load_pretrained(args, n_classes, model,
                                                      optimizer, scheduler)

    model_path = os.path.join(args.save_path, exp_name) + ".pth"
    es = EarlyStopping(patience=15, mode="max")
    for epoch in range(args.epochs):
        print(f"Epoch: {epoch+1}/{args.epochs}")
        train_loss, train_acc = train(train_dataloader, model, optimizer,
                                      device)
        val_loss, val_acc = validate(val_dataloader, model, device)
        logging.info(
            "Epoch: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}"
            .format(epoch + 1, train_loss, train_acc, val_loss, val_acc))
        scheduler.step(val_acc)
        es(
            model_path=model_path,
            epoch_score=val_acc,
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
        )
        if es.early_stop:
            print("Early stopping")
            break

    print("### Training Complete ###")
예제 #3
0
if __name__ == "__main__":
    train_dataset_dict, test_dataset_dict = load_ECG_dataset(root_dir)
    train_dataset, validation_dataset = split_dataset(train_dataset_dict,
                                                      val_num=100,
                                                      seed=0)

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(validation_dataset,
                                                 batch_size=16,
                                                 shuffle=False)

    dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

    model = LSTM(num_classes, input_size, hidden_size, num_layers, device)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate * 1e-3)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

    trained_model = train_model(model,
                                dataloaders_dict,
                                criterion,
                                optimizer,
                                exp_lr_scheduler,
                                device,
                                num_epochs,
                                stopping_epoch,
                                savedirpath=out)
예제 #4
0
                    # Model and optimizer
                    stop = False#
                    while(not stop):#
                        if(args.model=="LSTM"):

                            model = LSTM(nfeat=lstm_features, nhid=args.hidden, n_nodes=n_nodes, window=args.window, dropout=args.dropout,batch_size = args.batch_size, recur=args.recur).to(device)

                        elif(args.model=="MPNN_LSTM"):

                            model = MPNN_LSTM(nfeat=nfeat, nhid=args.hidden, nout=1, n_nodes=n_nodes, window=args.graph_window, dropout=args.dropout).to(device)

                        elif(args.model=="MPNN"):

                            model = MPNN(nfeat=nfeat, nhid=args.hidden, nout=1, dropout=args.dropout).to(device)

                        optimizer = optim.Adam(model.parameters(), lr=args.lr)
                        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10)

                        #------------------- Train
                        best_val_acc= 1e8
                        val_among_epochs = []
                        train_among_epochs = []
                        stop = False

                        for epoch in range(args.epochs):    
                            start = time.time()

                            model.train()
                            train_loss = AverageMeter()

                            # Train for one epoch
예제 #5
0
# TODO: n_vocab === train set
model = LSTM(input_size=flags.embedding_size,
             hidden_size=flags.hidden_size,
             output_size=dataset.vocabulary.vocab_size)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# hidden state and cell state (needed for LSTM)
state_h, state_c = model.zero_state(flags.batch_size)
state_h = state_h.to(device)
state_c = state_c.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=flags.learning_rate)

epoch = 1
iterator = 0
for i in range(flags.epochs * flags.max_batch):

    if iterator >= epoch * flags.max_batch:
        epoch += 1
        state_h, state_c = model.zero_state(flags.batch_size)
        state_h = state_h.to(device)
        state_c = state_c.to(device)

    iterator += 1
    inputs, targets = dataset()

    model.train()
예제 #6
0
RESULTS = []
MARGINS = [0.2]
MAX_EPOCHS = 50
BATCH_SIZE = 32
FILTER_WIDTHS = [3]
POOL_METHOD = "average"
FEATURE_DIMS = [667]
DROPOUT_PS = [0.1]
NUM_HIDDEN_UNITS = [240]
LEARNING_RATES = [1E-3]
MODELS = []
LSTM_HYPERPARAMETERS = itertools.product(MARGINS, NUM_HIDDEN_UNITS, LEARNING_RATES)
for margin, num_hidden_units, learning_rate in LSTM_HYPERPARAMETERS:
    model = LSTM(EMBEDDINGS, num_hidden_units, POOL_METHOD, CUDA)
    criterion = helpers.MaxMarginLoss(margin)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=learning_rate)
    model, mrr = train_utils.train_model(model, optimizer, criterion, ASK_UBUNTU_DATA, \
                                    MAX_EPOCHS, BATCH_SIZE, CUDA, eval_data=ANDROID_DATA)
    torch.save(model.state_dict(), "./lstm_" +
                                    str(margin) + "_" +
                                    str(num_hidden_units) + "_" +
                                    str(learning_rate) + "_" +
                                    "auc=" + str(mrr))
    MODELS.append((mrr, margin, num_hidden_units, learning_rate))

##############################################################################
# Train models by adverserial domain adaptation and evaluate
##############################################################################
MAX_EPOCHS = 50
BATCH_SIZE = 32
예제 #7
0
hidden_size = 1024
lr = 0.001

# list of uppercase letters to init bandnames
uppers = string.ascii_uppercase

rnn_type = str(sys.argv[1])

if rnn_type == 'milstm':
    decoder = miLSTM(n_characters, hidden_size, 64, n_characters)
elif rnn_type == 'lstm':
    decoder = LSTM(n_characters, hidden_size, 64, n_characters)
else:
    decoder = LN_miLSTM(n_characters, hidden_size, 64, n_characters)

decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

decoder.cuda()

death_metal_bands = pd.read_csv('../data/death-metal/bands.csv')

band_raw = death_metal_bands['name'].tolist()

band_nms = []
for i, bnd in enumerate(band_raw):
    band_nms.append(bnd + '<EOS>')

print('Found', len(band_nms), 'bands!')

예제 #8
0
def main():
    global args, best_auc
    args = parser.parse_args()
    cuda_available = torch.cuda.is_available()
    print args

    embedding_file = 'data/glove/glove.pruned.txt.gz'
    embedding_iter = Embedding.iterator(embedding_file)
    embed_size = 300
    embedding = Embedding(embed_size, embedding_iter)
    print 'Embeddings loaded.'

    android_corpus_file = 'data/android/corpus.tsv.gz'
    android_dataset = AndroidDataset(android_corpus_file)
    android_corpus = android_dataset.get_corpus()
    android_ids = embedding.corpus_to_ids(android_corpus)
    print 'Got Android corpus ids.'

    ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz'
    ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file)
    ubuntu_corpus = ubuntu_dataset.get_corpus()
    ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus)
    print 'Got AskUbuntu corpus ids.'

    padding_id = embedding.vocab_ids['<padding>']

    ubuntu_train_file = 'data/askubuntu/train_random.txt'
    ubuntu_train_data = ubuntu_dataset.read_annotations(ubuntu_train_file)

    dev_pos_file = 'data/android/dev.pos.txt'
    dev_neg_file = 'data/android/dev.neg.txt'
    android_dev_data = android_dataset.read_annotations(
        dev_pos_file, dev_neg_file)

    android_dev_batches = batch_utils.generate_eval_batches(
        android_ids, android_dev_data, padding_id)

    assert args.model in ['lstm', 'cnn']
    if args.model == 'lstm':
        model_encoder = LSTM(embed_size, args.hidden)
    else:
        model_encoder = CNN(embed_size, args.hidden)
    model_classifier = FFN(args.hidden)
    print model_encoder
    print model_classifier

    optimizer_encoder = torch.optim.Adam(model_encoder.parameters(),
                                         lr=args.elr)
    criterion_encoder = nn.MultiMarginLoss(margin=args.margin)

    optimizer_classifier = torch.optim.Adam(model_classifier.parameters(),
                                            lr=args.clr)
    criterion_classifier = nn.CrossEntropyLoss()

    if cuda_available:
        criterion_encoder = criterion_encoder.cuda()
        criterion_classifier = criterion_classifier.cuda()

    if args.load:
        if os.path.isfile(args.load):
            print 'Loading checkpoint.'
            checkpoint = torch.load(args.load)
            args.start_epoch = checkpoint['epoch']
            best_auc = checkpoint.get('best_auc', -1)
            model_encoder.load_state_dict(checkpoint['encoder_state_dict'])
            model_classifier.load_state_dict(
                checkpoint['classifier_state_dict'])

            print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch'])
        else:
            print 'No checkpoint found here.'

    if args.eval:
        test_pos_file = 'data/android/test.pos.txt'
        test_neg_file = 'data/android/test.neg.txt'
        android_test_data = android_dataset.read_annotations(
            test_pos_file, test_neg_file)

        android_test_batches = batch_utils.generate_eval_batches(
            android_ids, android_test_data, padding_id)

        print 'Evaluating on dev set.'
        train_utils.evaluate_auc(args, model_encoder, embedding,
                                 android_dev_batches, padding_id)

        print 'Evaluating on test set.'
        train_utils.evaluate_auc(args, model_encoder, embedding,
                                 android_test_batches, padding_id)
        return

    for epoch in xrange(args.start_epoch, args.epochs):
        encoder_train_batches = batch_utils.generate_train_batches(
            ubuntu_ids, ubuntu_train_data, args.batch_size, padding_id)
        classifier_train_batches = \
            batch_utils.generate_classifier_train_batches(
                ubuntu_ids, android_ids, args.batch_size,
                len(encoder_train_batches), padding_id)

        train_utils.train_encoder_classifer(
            args, model_encoder, model_classifier, embedding,
            optimizer_encoder, optimizer_classifier, criterion_encoder,
            criterion_classifier,
            zip(encoder_train_batches,
                classifier_train_batches), padding_id, epoch, args.lmbda)

        auc = train_utils.evaluate_auc(args, model_encoder, embedding,
                                       android_dev_batches, padding_id)

        is_best = auc > best_auc
        best_auc = max(auc, best_auc)
        save(
            args, {
                'epoch': epoch + 1,
                'arch': 'lstm',
                'encoder_state_dict': model_encoder.state_dict(),
                'classifier_state_dict': model_classifier.state_dict(),
                'best_auc': best_auc,
            }, is_best)
예제 #9
0
          'loss_train: {:.4f}'.format(train_loss / count),
          'time: {:.4f}s'.format(time.time() - t))

# Stores DeepSets model into disk
torch.save(
    {
        'state_dict': deepsets.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, 'model_deepsets.pth.tar')

print("Finished training for DeepSets model")
print()

# Initializes LSTM model and optimizer
lstm = LSTM(n_digits, embedding_dim, hidden_dim).to(device)
optimizer = optim.Adam(lstm.parameters(), lr=learning_rate)
loss_function = nn.L1Loss()

# Trains the LSTM model
for epoch in range(epochs):
    t = time.time()
    lstm.train()

    train_loss = 0
    count = 0
    idx = np.random.permutation(n_train)
    for i in range(0, n_train, batch_size):

        ############## Task 5

        ##################
예제 #10
0
    def train_with_lstm(self):
        x_train, y_train, x_test, y_test = split_data(
            self.data, self.input_params['lookback'])
        print('x_train.shape = ', x_train.shape)
        print('y_train.shape = ', y_train.shape)
        print('x_test.shape = ', x_test.shape)
        print('y_test.shape = ', y_test.shape)
        x_train = torch.from_numpy(x_train).type(torch.Tensor)
        x_test = torch.from_numpy(x_test).type(torch.Tensor)
        y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
        y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)
        y_train_gru = torch.from_numpy(y_train).type(torch.Tensor)
        y_test_gru = torch.from_numpy(y_test).type(torch.Tensor)

        model = LSTM(input_dim=self.input_params['input_dim'],
                     hidden_dim=self.input_params['hidden_dim'],
                     output_dim=self.input_params['output_dim'],
                     num_layers=self.input_params['num_layers'])

        criterion = torch.nn.MSELoss(reduction='mean')
        # optimiser = torch.optim.Adam(model.parameters(), lr=0.01)
        # Adam 一种可以替代传统随机梯度下降过程的一阶优化算法,它能基于训练数据迭代地更新神经网络权重
        optimiser = torch.optim.Adam(model.parameters(),
                                     lr=self.input_params['lr'])

        hist = np.zeros(self.input_params['num_epochs'])
        start_time = time.time()
        lstm = []

        # 随机梯度下降
        for t in range(self.input_params['num_epochs']):
            y_train_pred = model(x_train)

            loss = criterion(y_train_pred, y_train_lstm)
            print("Epoch ", t, "MSE: ", loss.item())
            hist[t] = loss.item()

            # 将模型的参数梯度初始化为 0
            optimiser.zero_grad()
            # 反向传播计算梯度
            loss.backward()
            # 更新所有参数
            optimiser.step()

        training_time = time.time() - start_time
        print("Training time: {}".format(training_time))

        # 将标准化后的数据转换为原始数据
        predict = pd.DataFrame(
            self.scaler.inverse_transform(y_train_pred.detach().numpy()))
        original = pd.DataFrame(
            self.scaler.inverse_transform(y_train_lstm.detach().numpy()))

        print(predict)
        fig = plt.figure()
        # 调整子图布局
        fig.subplots_adjust(hspace=0.2, wspace=0.2)

        # 股票价格
        plt.subplot(1, 2, 1)
        ax = sns.lineplot(x=original.index,
                          y=original[0],
                          label="Data",
                          color='royalblue')
        ax = sns.lineplot(x=predict.index,
                          y=predict[0],
                          label="Training Prediction (LSTM)",
                          color='tomato')
        ax.set_title('Stock price', size=14, fontweight='bold')
        ax.set_xlabel("Days", size=14)
        ax.set_ylabel("Cost (USD)", size=14)
        ax.set_xticklabels('', size=10)
        plt.show()

        # # 训练损失
        # plt.subplot(1, 2, 2)
        # print(hist)
        # ax = sns.lineplot(data=hist, color='royalblue')
        # ax.set_xlabel("Epoch", size=14)
        # ax.set_ylabel("Loss", size=14)
        # ax.set_title("Training Loss", size=14, fontweight='bold')
        # fig.set_figheight(6)
        # fig.set_figwidth(16)
        # fig.show()

        # > 数据预测
        # make predictions
        y_test_pred = model(x_test)

        # invert predictions
        # X = scaler.inverse_transform(X[, copy]) 将标准化后的数据转换为原始数据
        y_train_pred = self.scaler.inverse_transform(
            y_train_pred.detach().numpy())
        y_train = self.scaler.inverse_transform(y_train_lstm.detach().numpy())
        y_test_pred = self.scaler.inverse_transform(
            y_test_pred.detach().numpy())
        y_test = self.scaler.inverse_transform(y_test_lstm.detach().numpy())

        # mean_squared_error 均方误差
        trainScore = math.sqrt(
            mean_squared_error(y_train[:, 0], y_train_pred[:, 0]))
        print('Train Score: %.2f RMSE' % (trainScore))
        testScore = math.sqrt(
            mean_squared_error(y_test[:, 0], y_test_pred[:, 0]))
        print('Test Score: %.2f RMSE' % (testScore))
        lstm.append(trainScore)
        lstm.append(testScore)
        lstm.append(training_time)

        # > train and test
        # empty_like 生成和已有数组相同大小,类型的数组
        trainPredictPlot = np.empty_like(self.data)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[self.input_params['lookback']:len(y_train_pred) +
                         self.input_params['lookback'], :] = y_train_pred

        # shift test predictions for plotting
        testPredictPlot = np.empty_like(self.data)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[len(y_train_pred) + self.input_params['lookback'] -
                        1:len(self.data) - 1, :] = y_test_pred

        original = self.scaler.inverse_transform(
            self.data['Close'].values.reshape(-1, 1))

        predictions = np.append(trainPredictPlot, testPredictPlot, axis=1)
        predictions = np.append(predictions, original, axis=1)
        result = pd.DataFrame(predictions)

        # >> fig
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                go.Scatter(x=result.index,
                           y=result[0],
                           mode='lines',
                           name='Train prediction')))
        fig.add_trace(
            go.Scatter(x=result.index,
                       y=result[1],
                       mode='lines',
                       name='Test prediction'))
        fig.add_trace(
            go.Scatter(
                go.Scatter(x=result.index,
                           y=result[2],
                           mode='lines',
                           name='Actual Value')))
        fig.update_layout(xaxis=dict(showline=True,
                                     showgrid=True,
                                     showticklabels=False,
                                     linecolor='white',
                                     linewidth=2),
                          yaxis=dict(
                              title_text='Close (USD)',
                              titlefont=dict(
                                  family='Rockwell',
                                  size=12,
                                  color='white',
                              ),
                              showline=True,
                              showgrid=True,
                              showticklabels=True,
                              linecolor='white',
                              linewidth=2,
                              ticks='outside',
                              tickfont=dict(
                                  family='Rockwell',
                                  size=12,
                                  color='white',
                              ),
                          ),
                          showlegend=True,
                          template='plotly_dark')

        annotations = []
        annotations.append(
            dict(xref='paper',
                 yref='paper',
                 x=0.0,
                 y=1.05,
                 xanchor='left',
                 yanchor='bottom',
                 text='Results (LSTM)',
                 font=dict(family='Rockwell', size=26, color='white'),
                 showarrow=False))
        fig.update_layout(annotations=annotations)

        fig.show()
        #   py.iplot(fig, filename='stock_prediction_lstm')
        return lstm
예제 #11
0
파일: adda.py 프로젝트: k-weng/qa-retrieval
def main():
    global args, best_auc
    args = parser.parse_args()
    cuda_available = torch.cuda.is_available()
    print args

    embedding_file = 'data/glove/glove.pruned.txt.gz'
    embedding_iter = Embedding.iterator(embedding_file)
    embed_size = 300
    embedding = Embedding(embed_size, embedding_iter)
    print 'Embeddings loaded.'

    android_corpus_file = 'data/android/corpus.tsv.gz'
    android_dataset = AndroidDataset(android_corpus_file)
    android_corpus = android_dataset.get_corpus()
    android_ids = embedding.corpus_to_ids(android_corpus)
    print 'Got Android corpus ids.'

    ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz'
    ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file)
    ubuntu_corpus = ubuntu_dataset.get_corpus()
    ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus)
    print 'Got AskUbuntu corpus ids.'

    padding_id = embedding.vocab_ids['<padding>']

    dev_pos_file = 'data/android/dev.pos.txt'
    dev_neg_file = 'data/android/dev.neg.txt'
    android_dev_data = android_dataset.read_annotations(
        dev_pos_file, dev_neg_file)

    android_dev_batches = batch_utils.generate_eval_batches(
        android_ids, android_dev_data, padding_id)

    assert args.model in ['lstm', 'cnn']
    if os.path.isfile(args.load):
        checkpoint = torch.load(args.load)
    else:
        print 'No checkpoint found here.'
        return

    if args.model == 'lstm':
        encoder_src = LSTM(embed_size, args.hidden)
        encoder_tgt = LSTM(embed_size, args.hidden)
    else:
        encoder_src = CNN(embed_size, args.hidden)
        encoder_tgt = CNN(embed_size, args.hidden)
    encoder_src.load_state_dict(checkpoint['state_dict'])
    encoder_src.eval()

    model_discrim = FFN(args.hidden)

    print encoder_src
    print encoder_tgt
    print model_discrim

    criterion = nn.CrossEntropyLoss()
    if cuda_available:
        criterion = criterion.cuda()

    betas = (0.5, 0.999)
    weight_decay = 1e-4
    optimizer_tgt = torch.optim.Adam(encoder_tgt.parameters(),
                                     lr=args.elr,
                                     betas=betas,
                                     weight_decay=weight_decay)
    optimizer_discrim = torch.optim.Adam(model_discrim.parameters(),
                                         lr=args.dlr,
                                         betas=betas,
                                         weight_decay=weight_decay)

    for epoch in xrange(args.start_epoch, args.epochs):
        train_batches = \
            batch_utils.generate_classifier_train_batches(
                ubuntu_ids, android_ids, args.batch_size,
                args.batch_count, padding_id)

        train_utils.train_adda(args, encoder_src, encoder_tgt, model_discrim,
                               embedding, optimizer_tgt, optimizer_discrim,
                               criterion, train_batches, padding_id, epoch)

        auc = train_utils.evaluate_auc(args, encoder_tgt, embedding,
                                       android_dev_batches, padding_id)

        is_best = auc > best_auc
        best_auc = max(auc, best_auc)
        save(
            args, {
                'epoch': epoch + 1,
                'arch': 'lstm',
                'encoder_tgt_state_dict': encoder_tgt.state_dict(),
                'discrim_state_dict': model_discrim.state_dict(),
                'best_auc': best_auc,
            }, is_best)
예제 #12
0
파일: qa.py 프로젝트: k-weng/qa-retrieval
def main():
    global args, best_mrr, best_auc
    args = parser.parse_args()
    cuda_available = torch.cuda.is_available()
    print args

    corpus_file = 'data/askubuntu/text_tokenized.txt.gz'
    dataset = UbuntuDataset(corpus_file)
    corpus = dataset.get_corpus()

    if args.embedding == 'askubuntu':
        embedding_file = 'data/askubuntu/vector/vectors_pruned.200.txt.gz'
    else:
        embedding_file = 'data/glove/glove.pruned.txt.gz'

    embedding_iter = Embedding.iterator(embedding_file)
    embedding = Embedding(args.embed, embedding_iter)
    print 'Embeddings loaded.'

    corpus_ids = embedding.corpus_to_ids(corpus)
    padding_id = embedding.vocab_ids['<padding>']

    train_file = 'data/askubuntu/train_random.txt'
    train_data = dataset.read_annotations(train_file)

    dev_file = 'data/askubuntu/dev.txt'
    dev_data = dataset.read_annotations(dev_file, max_neg=-1)
    dev_batches = batch_utils.generate_eval_batches(corpus_ids, dev_data,
                                                    padding_id)

    assert args.model in ['lstm', 'cnn']
    if args.model == 'lstm':
        model = LSTM(args.embed, args.hidden)
    else:
        model = CNN(args.embed, args.hidden)

    print model
    print 'Parameters: {}'.format(params(model))

    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    criterion = nn.MultiMarginLoss(margin=args.margin)

    if cuda_available:
        criterion = criterion.cuda()

    if args.load:
        if os.path.isfile(args.load):
            print 'Loading checkpoint.'
            checkpoint = torch.load(args.load)
            args.start_epoch = checkpoint['epoch']
            best_mrr = checkpoint.get('best_mrr', -1)
            best_auc = checkpoint.get('best_auc', -1)
            model.load_state_dict(checkpoint['state_dict'])

            print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch'])
        else:
            print 'No checkpoint found here.'

    if args.eval:
        test_file = 'data/askubuntu/test.txt'
        test_data = dataset.read_annotations(test_file, max_neg=-1)
        test_batches = batch_utils.generate_eval_batches(
            corpus_ids, test_data, padding_id)

        print 'Evaluating on dev set.'
        train_utils.evaluate_metrics(args, model, embedding, dev_batches,
                                     padding_id)

        print 'Evaluating on test set.'
        train_utils.evaluate_metrics(args, model, embedding, test_batches,
                                     padding_id)
        return

    if args.android:
        android_file = 'data/android/corpus.tsv.gz'
        android_dataset = AndroidDataset(android_file)
        android_ids = embedding.corpus_to_ids(android_dataset.get_corpus())

        dev_pos_file = 'data/android/dev.pos.txt'
        dev_neg_file = 'data/android/dev.neg.txt'
        android_data = android_dataset.read_annotations(
            dev_pos_file, dev_neg_file)

        android_batches = batch_utils.generate_eval_batches(
            android_ids, android_data, padding_id)

    for epoch in xrange(args.start_epoch, args.epochs):
        train_batches = batch_utils.generate_train_batches(
            corpus_ids, train_data, args.batch_size, padding_id)

        train_utils.train(args, model, embedding, optimizer, criterion,
                          train_batches, padding_id, epoch)

        map, mrr, p1, p5 = train_utils.evaluate_metrics(
            args, model, embedding, dev_batches, padding_id)

        auc = -1
        if args.android:
            auc = train_utils.evaluate_auc(args, model, embedding,
                                           android_batches, padding_id)

        is_best = auc > best_auc if args.android else mrr > best_mrr
        best_mrr = max(mrr, best_mrr)
        best_auc = max(auc, best_auc)
        save(
            args, {
                'epoch': epoch + 1,
                'arch': 'lstm',
                'state_dict': model.state_dict(),
                'best_mrr': best_mrr,
                'best_auc': best_auc,
            }, is_best)