def __init__(self, args):
        logging.basicConfig(level=logging.INFO)
        transformers_logger = logging.getLogger("transformers")
        transformers_logger.setLevel(logging.WARNING)

        model_args = Seq2SeqArgs()
        model_args.num_train_epochs = 1
        model_args.no_save = True
        model_args.train_batch_size = 4
        model_args.evaluate_generated_text = True
        model_args.evaluate_during_training = False
        model_args.evaluate_generated_text = False
        model_args.evaluate_during_training_verbose = True
        model_args.use_multiprocessing = False
        model_args.max_seq_length = 5
        model_args.max_length = 6
        model_args.overwrite_output_dir = True
        self.model_args = model_args
        cuda_available = torch.cuda.is_available()

        # Initialize model
        self.model = Seq2SeqModel(
            encoder_decoder_type="bart",
            encoder_decoder_name="facebook/bart-base",
            args=model_args,
            use_cuda=cuda_available,
        )
Ejemplo n.º 2
0
def create_network():
    # Configure the model
    model_args = Seq2SeqArgs()
    model_args.padding = "longest"
    model_args.length_penalty = 1
    model_args.truncation = True
    model_args.max_length = 512

    model = Seq2SeqModel(
        encoder_decoder_type="bart",
        encoder_decoder_name="facebook/bart-large-cnn",
        args=model_args,
    )
    return model
Ejemplo n.º 3
0
def main():
    model_args = Seq2SeqArgs()
    model_args.eval_batch_size = 4
    model_args.evaluate_during_training = True
    model_args.evaluate_during_training_steps = 2500
    model_args.evaluate_during_training_verbose = True
    model_args.fp16 = False
    model_args.learning_rate = 5e-5
    model_args.max_seq_length = 128
    model_args.num_train_epochs = 2
    model_args.overwrite_output_dir = False
    model_args.reprocess_input_data = True
    model_args.save_eval_checkpoints = False
    model_args.save_steps = -1
    model_args.train_batch_size = 16
    model_args.use_multiprocessing = False
    model_args.do_sample = True
    model_args.num_beams = None
    model_args.num_return_sequences = 3
    model_args.max_length = 128
    model_args.top_k = 50
    model_args.top_p = 0.95
    model_args.n_gpu = 1
    model_args.wandb_project = None

    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    model = Seq2SeqModel(
        encoder_decoder_type="bart",
        encoder_decoder_name="outputs_23-04-2021/checkpoint-144205-epoch-5",
        args=model_args,
        cuda_device=2)

    while True:
        print('first sentence:')
        input_text = input()
        if input_text == 'exit':
            break
        print('second sentence:')
        target_text = input()
        prefix = 'paraphrase'
        d = dict(input_text=input_text, target_text=target_text, prefix=prefix)

        eval_df = pd.DataFrame([[input_text, target_text, prefix]],
                               columns=d.keys())
        prediction, losses = model.project_inference_method(eval_df)
        print(prediction)
        print(losses)
Ejemplo n.º 4
0
import logging

from simpletransformers.seq2seq import Seq2SeqModel, Seq2SeqArgs

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.eval_batch_size = 64
model_args.use_multiprocessing = False
model_args.max_seq_length = 196
model_args.max_length = 512
model_args.num_beams = None
model_args.do_sample = True
model_args.top_k = 50
model_args.top_p = 0.95

use_cuda = True


def load_german():
    english_to_german_model = Seq2SeqModel(
        encoder_decoder_type="marian",
        encoder_decoder_name="Helsinki-NLP/opus-mt-en-de",
        use_cuda=use_cuda,
        args=model_args,
    )
    return english_to_german_model
def main():
    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    model_args = Seq2SeqArgs()
    model_args.eval_batch_size = 4
    model_args.evaluate_during_training = True
    model_args.evaluate_during_training_steps = 5000
    model_args.evaluate_during_training_verbose = True
    model_args.fp16 = False
    model_args.learning_rate = 5e-5
    model_args.max_seq_length = 128
    model_args.num_train_epochs = 5
    model_args.overwrite_output_dir = False
    model_args.reprocess_input_data = True
    model_args.save_eval_checkpoints = True
    model_args.save_steps = -1
    model_args.save_model_every_epoch = True
    model_args.train_batch_size = 4
    model_args.use_multiprocessing = False
    model_args.do_sample = True
    model_args.num_beams = None
    model_args.num_return_sequences = 3
    model_args.max_length = 128
    model_args.top_k = 50
    model_args.top_p = 0.95
    model_args.n_gpu = 1
    experiment_name = "bart-large-paws"
    model_args.output_dir = experiment_name
    model_args.best_model_dir = 'best_model/' + experiment_name
    model_args.wandb_experiment = experiment_name
    model_args.wandb_project = "NLP Project experiments"

    encoder_decoder_name = "facebook/bart-large"

    train_df = pd.read_csv(
        '/home/fodl/asafmaman/PycharmProjects/nlp_final_project_private/'
        'paraphrasing/data/cleaned_labeled/'
        'paws_train_clean.csv')
    eval_df = pd.read_csv(
        '/home/fodl/asafmaman/PycharmProjects/nlp_final_project_private/'
        'paraphrasing/data/cleaned_labeled/'
        'paws_test_clean_no_train_overlap.csv')

    train_df = train_df[train_df['is_duplicate'] == 1][[
        'sentence1', 'sentence2'
    ]]
    train_df['prefix'] = 'paraphrase'
    train_df = train_df.rename(columns={
        "sentence1": "input_text",
        "sentence2": "target_text"
    })
    # positive = positive.rename(columns={"sentence2": "input_text", "sentence1": "target_text"})
    train_df = train_df[['input_text', 'target_text', 'prefix']]
    train_df = train_df.dropna()

    eval_df = eval_df[eval_df['is_duplicate'] == 1][['sentence1', 'sentence2']]
    eval_df['prefix'] = 'paraphrase'
    eval_df = eval_df.rename(columns={
        "sentence1": "input_text",
        "sentence2": "target_text"
    })
    # eval_df = eval_df.rename(columns={"sentence2": "input_text", "sentence1": "target_text"})
    eval_df = eval_df[['input_text', 'target_text', 'prefix']]
    eval_df = eval_df.dropna()

    model = Seq2SeqModel(encoder_decoder_type="bart",
                         encoder_decoder_name=encoder_decoder_name,
                         args=model_args,
                         cuda_device=3)
    print(train_df)
    model.train_model(train_df, eval_data=eval_df)
Ejemplo n.º 6
0
def main():
    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    eval_df, train_df = import_datasets()

    model_args = Seq2SeqArgs()
    model_args.eval_batch_size = 4
    model_args.evaluate_during_training = True
    model_args.evaluate_during_training_steps = 2500
    model_args.evaluate_during_training_verbose = True
    model_args.fp16 = False
    model_args.learning_rate = 5e-5
    model_args.max_seq_length = 128
    model_args.num_train_epochs = 2
    model_args.overwrite_output_dir = False
    model_args.reprocess_input_data = True
    model_args.save_eval_checkpoints = False
    model_args.save_steps = -1
    model_args.train_batch_size = 4
    model_args.use_multiprocessing = False
    model_args.do_sample = True
    model_args.num_beams = None
    model_args.num_return_sequences = 3
    model_args.max_length = 128
    model_args.top_k = 50
    model_args.top_p = 0.95
    model_args.n_gpu = 1
    model_args.output_dir = "outputs_19-04-2021"
    model_args.wandb_project = "Paraphrasing with BART_19-04-2021"

    model = Seq2SeqModel(encoder_decoder_type="bart",
                         encoder_decoder_name="facebook/bart-large",
                         args=model_args,
                         cuda_device=7)

    model.train_model(train_df, eval_data=eval_df)

    to_predict = [
        prefix + ": " + str(input_text) for prefix, input_text in zip(
            eval_df["prefix"].tolist(), eval_df["input_text"].tolist())
    ]

    truth = eval_df["target_text"].tolist()

    preds = model.predict(to_predict)

    # Saving the predictions if needed
    os.makedirs("predictions", exist_ok=True)
    with open(f"predictions/predictions_{datetime.now()}.txt", "w") as f:
        for i, text in enumerate(eval_df["input_text"].tolist()):
            f.write(str(text) + "\n\n")

            f.write("Truth:\n")
            f.write(truth[i] + "\n\n")

            f.write("Prediction:\n")
            for pred in preds[i]:
                f.write(str(pred) + "\n")
            f.write(
                "________________________________________________________________________________\n"
            )
Ejemplo n.º 7
0
def main():
    model_args = Seq2SeqArgs()
    model_args.eval_batch_size = 1  # dont change
    model_args.evaluate_during_training = True
    model_args.evaluate_during_training_steps = 2500
    model_args.evaluate_during_training_verbose = True
    model_args.fp16 = False
    model_args.learning_rate = 5e-5
    model_args.max_seq_length = 128
    model_args.num_train_epochs = 2
    model_args.overwrite_output_dir = False
    model_args.reprocess_input_data = True
    model_args.save_eval_checkpoints = False
    model_args.save_steps = -1
    model_args.train_batch_size = 1  # DON'T CHANGE!!!
    model_args.use_multiprocessing = False
    model_args.do_sample = True
    model_args.num_beams = None
    model_args.num_return_sequences = 3
    model_args.max_length = 128
    model_args.top_k = 50
    model_args.top_p = 0.95
    model_args.n_gpu = 1
    model_args.wandb_project = None

    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    dataset_to_evaluate = 'mrpc'
    cuda_device = 3
    threshold = None
    inversed = False
    score_type = 'loss'

    # encoder_decoder_name = "facebook/bart-base"
    # encoder_decoder_name = "facebook/bart-large"
    # encoder_decoder_name = "bart-base-all"
    # encoder_decoder_name = "bart-large-all"
    # encoder_decoder_name = "bart-base-mrpc"
    # encoder_decoder_name = "bart-large-mrpc"
    # encoder_decoder_name = "bart-base-paws"
    # encoder_decoder_name = "bart-large-paws"
    encoder_decoder_name = "bart-base-qqp"
    # encoder_decoder_name = "bart-large-qqp"

    print(dataset_to_evaluate)
    print(encoder_decoder_name)

    negative, positive = import_cleaned_data(dataset_to_evaluate, inversed)

    model = Seq2SeqModel(encoder_decoder_type="bart",
                         encoder_decoder_name=encoder_decoder_name,
                         args=model_args,
                         cuda_device=cuda_device)

    if score_type == 'probs':
        positive_losses = model.project_inference_method(positive)
        negative_losses = model.project_inference_method(negative)
    elif score_type == 'loss':
        positive_losses = model.ce_losses(positive)
        negative_losses = model.ce_losses(negative)
    else:
        raise AssertionError("score_type has to be one of 'loss' or 'probs'")

    plot_histograms(positive_losses=positive_losses,
                    negative_losses=negative_losses,
                    plot_title=(dataset_to_evaluate + '/' +
                                encoder_decoder_name),
                    x_min=0.5,
                    x_max=8)

    calculate_accuracy_and_f1(positive_losses,
                              negative_losses,
                              threshold=threshold)