예제 #1
0
def predict_uncased_sadness(dropout=0.2):
  train_df, dev_df, test_df = get_data('sadness')
  bert_tokenizer_uncased = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
  model = BertForUncasedClassification(dropout)
  model.load_state_dict(torch.load('./data/models/uncased-bert/models/sadness', map_location='cpu'))
  predicted_train_df = predict(train_df, model, bert_tokenizer_uncased, 'Clean_Tweet')
  predicted_dev_df = predict(dev_df, model, bert_tokenizer_uncased, 'Clean_Tweet')
  predicted_test_df = predict(test_df, model, bert_tokenizer_uncased, 'Clean_Tweet')

  predicted_train_df.to_csv('./data/models/uncased-bert/output/sadness_train_out.csv')
  predicted_dev_df.to_csv('./data/models/uncased-bert/output/sadness_dev_out.csv')
  predicted_test_df.to_csv('./data/models/uncased-bert/output/sadness_test_out.csv')
def train_individual_uncased_joy():
    train, dev, _ = get_bert_data_loader('joy')
    final_training_stats = []

    for d in [0.1, 0.2, 0.3]:
        for w in [0.8, 0.85, 0.9, 0.95]:
            for e in [1e-06, 1e-07, 1e-08]:
                for lr in [2e-5, 3e-5, 5e-5]:
                    uncased_model = BertForUncasedClassification(dropout=d)
                    uncased_trained, _, _, _ = train_model(
                        uncased_model,
                        'joy_uncased',
                        train,
                        dev,
                        filepath='./models/joy_uncased/',
                        lr=lr,
                        eps=e,
                        weight_decay=w)

                    final_training_stats.extend(uncased_trained)

    pd.DataFrame(final_training_stats).to_csv(
        './data/models/uncased-bert/results/joy/results.csv')