def main(args):
    print("--- Starting with reading in the %s dataset ---" % args.dataset)
    (train_set,
     test_set), output_dimensions, target_names = multi_task_dataset_prep(
         args.dataset)
    print("--- Finished with reading in the %s dataset ---" % args.dataset)

    if args.dataset == "DAILYDIALOG-BERT":
        collate_fn = collate_fn_dailydialog
    elif args.dataset == "ENRON-BERT":
        collate_fn = collate_fn_enron
    else:
        raise (Exception("The given dataset name is not recognised"))

    train_set = DataLoader(
        train_set,
        shuffle=True,
        batch_size=args.batch_size,
        collate_fn=lambda x: collate_fn(x, include_lens=args.use_lengths))
    test_set = DataLoader(
        test_set,
        shuffle=False,
        batch_size=args.batch_size,
        collate_fn=lambda x: collate_fn(x, include_lens=args.use_lengths))

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    towers = {
        MLP(args.num_filters_experts * len(args.filter_list_experts),
            args.linear_layers, output_dim): name
        for output_dim, name in zip(output_dimensions, target_names)
    }

    # initialize the multiple LSTMs and gating functions
    # gating_networks = [ConvNet(input_channels=1, filter_list=args.filter_list_g,
    #                 embed_matrix=torch.zeros(size=(1, 768)), num_filters=args.num_filters_g, output_dim=args.n_experts,
    #                            use_bert_embeds=True) for _ in
    #                                             range(len(target_names))]

    gating_networks = [
        SimpleLSTM(torch.zeros(size=(1, 768)),
                   args.hidden_dim_g,
                   args.n_experts,
                   device=args.device,
                   use_lengths=args.use_lengths,
                   use_bert_embeds=True) for _ in range(len(target_names))
    ]

    shared_layers = [
        MultitaskConvNet(input_channels=1,
                         filter_list=args.filter_list_experts,
                         embed_matrix=torch.zeros(size=(1, 768)),
                         num_filters=args.num_filters_experts,
                         use_bert_embeds=True) for _ in range(args.n_experts)
    ]

    model = MultiGateMixtureofExperts(
        shared_layers=shared_layers,
        gating_networks=gating_networks,
        towers=towers,
        device=args.device,
        include_lens=args.use_lengths,
        batch_size=args.batch_size,
        gating_drop=args.gate_dropout,
        mean_diff=args.mean_diff,
        weight_adjust_mode=args.balancing_strategy)

    # TODO: c
    if args.class_weighting:
        task_weights = multitask_class_weighting(train_set, target_names,
                                                 output_dimensions)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in target_names}

    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(model,
          losses,
          optimizer,
          scheduler,
          train_set,
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip,
          balancing_epoch_num=args.balance_epoch_cnt)

    print("Evaluating model")
    model.load_state_dict(
        torch.load("%s/%s_datasets_epoch_%d.pt" %
                   (args.logdir, "_".join(target_names), args.n_epochs - 1)))
    evaluation(model, test_set, losses, device=args.device)
def main(args):

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    print("--- Finished with reading in the dataset ---")

    towers = {
        MLP(args.hidden_dim, args.linear_layers, output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    model = MultiTaskLSTM(vocab=TEXT.vocab.vectors,
                          hidden_dim=args.hidden_dim,
                          device=args.device,
                          use_lengths=args.use_lengths)

    multitask_model = MultiTaskModel(shared_layer=model,
                                     towers=towers,
                                     batch_size=args.batch_size,
                                     input_dimension=args.embedding_dim,
                                     device=args.device,
                                     include_lens=args.use_lengths)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(multitask_model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    multitask_model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
Ejemplo n.º 3
0
def main(args):

    # TODO: clip gradients
    # for the multitask learning, make a dictionary containing "task": data
    # Set the random seed for experiments (check if I need to do this for all the other files as well)
    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)
    # Load datasets

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    print("--- Finished with reading in the dataset ---")
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    towers = {
        MLP(
            len(args.filter_list) * args.num_filters, args.linear_layers,
            output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }

    model = MultitaskConvNet(1,
                             args.filter_list,
                             TEXT.vocab.vectors,
                             args.num_filters,
                             dropbout_probs=args.dropout)

    multitask_model = MultiTaskModel(
        shared_layer=model,
        towers=towers,
        batch_size=args.batch_size,
        input_dimension=TEXT.vocab.vectors.shape[1],
        device=args.device,
        include_lens=args.use_lengths)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(multitask_model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    multitask_model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
Ejemplo n.º 4
0
def main(args):
    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    towers = {
        MLP(args.num_filters_experts * len(args.filter_list_experts),
            args.linear_layers, output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }

    # Use name of dataset to get the arguments needed
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    # initialize the multiple CNNs and gating functions
    if args.gating_nets_type == "CNN":
        gating_networks = [
            ConvNet(input_channels=1,
                    filter_list=args.filter_list_g,
                    embed_matrix=TEXT.vocab.vectors,
                    num_filters=args.num_filters_g,
                    output_dim=args.n_experts)
            for _ in range(len(args.target_names))
        ]
    elif args.gating_nets_type == "LSTM":
        gating_networks = [
            SimpleLSTM(TEXT.vocab.vectors,
                       args.hidden_dim_g,
                       args.n_experts,
                       device=args.device,
                       use_lengths=args.use_lengths)
            for _ in range(len(args.target_names))
        ]

    elif args.gating_nets_type == "MLP":
        gating_networks = [
            MLPGate(args.fix_length, args.n_experts, TEXT.vocab.vectors)
            for _ in range(len(args.target_names))
        ]

    elif args.gating_nets_type == "TRANSFORMER":
        gating_networks = [
            TransformerModel(
                max_seq_len=args.fix_length,
                num_outputs=args.n_experts,
                word_embedding_matrix=TEXT.vocab.vectors,
                feed_fwd_dim=args.transformer_fwd_dim,
                num_transformer_layers=args.num_transformer_layers,
                num_transformer_heads=args.num_transformer_heads,
                pos_encoding_dropout=0.2,
                classification_dropout=0.3,
                batch_first=True,
                pad_index=TEXT.vocab.stoi['pad'])
            for _ in range(len(args.target_names))
        ]

    shared_layers = [
        MultitaskConvNet(input_channels=1,
                         filter_list=args.filter_list_experts,
                         embed_matrix=TEXT.vocab.vectors,
                         num_filters=args.num_filters_experts)
        for _ in range(args.n_experts)
    ]

    model = MultiGateMixtureofExperts(
        shared_layers=shared_layers,
        gating_networks=gating_networks,
        towers=towers,
        device=args.device,
        include_lens=args.use_lengths,
        batch_size=args.batch_size,
        gating_drop=args.gate_dropout,
        mean_diff=args.mean_diff,
        weight_adjust_mode=args.balancing_strategy)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip,
          balancing_epoch_num=args.balance_epoch_cnt,
          balancing_mode=args.balancing_strategy)

    print("Evaluating model")
    model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(model, data_iterators[-1], losses, device=args.device)