def main(args): torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) # Lines below are make sure cuda is (almost) deterministic, can slow down training # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=False, batch_first=True, fix_length=args.fix_length, init_token="[cls]") output_dimensions = get_num_classes_dataset(args.data_path, args.target_name) # Use name of dataset to get the arguments needed print("--- Starting with reading in the dataset ---") dataset = CSVDataset(text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_name) print("--- Finished with reading in the dataset ---") dloader = CustomDataLoader(dataset, TEXT, args.target_name) data_iterators = dloader.construct_iterators(vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) model = TransformerModel(max_seq_len=args.fix_length, num_outputs=output_dimensions, word_embedding_matrix=TEXT.vocab.vectors, feed_fwd_dim=args.fwd_dim, num_transformer_layers=args.num_transformer_layers, num_transformer_heads=args.num_transformer_heads, pos_encoding_dropout=args.pos_encoding_dropout, classification_dropout=args.fc_layer_dropout, batch_first=True, pad_index=TEXT.vocab.stoi['pad']) if args.class_weighting: weights = single_task_class_weighting(data_iterators[0]) criterion = nn.CrossEntropyLoss(weight=weights.to(args.device)) else: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.90, 0.98), eps=10e-9) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(model, criterion, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=False, save_path=args.logdir, save_name="csv_dataset", tensorboard_dir=args.logdir+"/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip) print("Evaluating model") model.load_state_dict(torch.load(args.logdir+"/csv_dataset_epoch_%d.pt" % (args.n_epochs-1))) evaluation(model, data_iterators[-1], criterion, device=args.device, include_lengths=False)
def main(args): torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True, fix_length=args.fix_length) output_dimensions = get_num_classes_dataset(args.data_path, args.target_name) # Use name of dataset to get the arguments needed print("--- Starting with reading in the dataset ---") dataset = CSVDataset(text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_name) print("--- Finished with reading in the dataset ---") dloader = CustomDataLoader(dataset, TEXT, args.target_name) data_iterators = dloader.construct_iterators(vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) model = SimpleLSTM(vocab=TEXT.vocab.vectors, hidden_dim=args.hidden_dim, output_dim=output_dimensions, device=args.device, use_lengths=args.use_lengths, dropout=args.dropout) if args.class_weighting: weights = single_task_class_weighting(data_iterators[0]) criterion = nn.CrossEntropyLoss(weight=weights.to(args.device)) else: criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(model, criterion, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=args.use_lengths, save_path=args.logdir, save_name="csv_dataset", tensorboard_dir=args.logdir+"/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip) print("Evaluating model") model.load_state_dict(torch.load(args.logdir+"/csv_dataset_epoch_%d.pt" % (args.n_epochs-1))) evaluation(model, data_iterators[-1], criterion, device=args.device, include_lengths=args.use_lengths)
def main(args): # TODO: clip gradients # for the multitask learning, make a dictionary containing "task": data # Set the random seed for experiments (check if I need to do this for all the other files as well) torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True, fix_length=args.fix_length) # Load datasets output_dimensions = get_num_classes_dataset(args.data_path, args.target_names) # Use name of dataset to get the arguments needed print("--- Starting with reading in the dataset ---") dataset = CSVDataset( text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_names) print("--- Finished with reading in the dataset ---") # Load the dataset and split it into train and test portions dloader = CustomDataLoader(dataset, TEXT, args.target_names) data_iterators = dloader.construct_iterators( vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) towers = { MLP( len(args.filter_list) * args.num_filters, args.linear_layers, output_dim): name for output_dim, name in zip(output_dimensions, args.target_names) } model = MultitaskConvNet(1, args.filter_list, TEXT.vocab.vectors, args.num_filters, dropbout_probs=args.dropout) multitask_model = MultiTaskModel( shared_layer=model, towers=towers, batch_size=args.batch_size, input_dimension=TEXT.vocab.vectors.shape[1], device=args.device, include_lens=args.use_lengths) if args.class_weighting: task_weights = multitask_class_weighting(data_iterators[0], args.target_names) losses = { name: nn.CrossEntropyLoss(weight=task_weights[name].to(args.device)) for name in args.target_names } else: losses = {name: nn.CrossEntropyLoss() for name in args.target_names} optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(multitask_model, losses, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=args.use_lengths, save_path=args.logdir, save_name="%s_datasets" % "_".join(args.target_names), tensorboard_dir=args.logdir + "/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip) print("Evaluating model") multitask_model.load_state_dict( torch.load( "%s/%s_datasets_epoch_%d.pt" % (args.logdir, "_".join(args.target_names), args.n_epochs - 1))) evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
def main(args): torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True, fix_length=args.fix_length) output_dimensions = get_num_classes_dataset(args.data_path, args.target_names) # Use name of dataset to get the arguments needed print("--- Starting with reading in the dataset ---") dataset = CSVDataset( text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_names) print("--- Finished with reading in the dataset ---") towers = { MLP(args.hidden_dim, args.linear_layers, output_dim): name for output_dim, name in zip(output_dimensions, args.target_names) } # Load the dataset and split it into train and test portions dloader = CustomDataLoader(dataset, TEXT, args.target_names) data_iterators = dloader.construct_iterators( vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) model = MultiTaskLSTM(vocab=TEXT.vocab.vectors, hidden_dim=args.hidden_dim, device=args.device, use_lengths=args.use_lengths) multitask_model = MultiTaskModel(shared_layer=model, towers=towers, batch_size=args.batch_size, input_dimension=args.embedding_dim, device=args.device, include_lens=args.use_lengths) if args.class_weighting: task_weights = multitask_class_weighting(data_iterators[0], args.target_names) losses = { name: nn.CrossEntropyLoss(weight=task_weights[name].to(args.device)) for name in args.target_names } else: losses = {name: nn.CrossEntropyLoss() for name in args.target_names} optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(multitask_model, losses, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=args.use_lengths, save_path=args.logdir, save_name="%s_datasets" % "_".join(args.target_names), tensorboard_dir=args.logdir + "/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip) print("Evaluating model") multitask_model.load_state_dict( torch.load( "%s/%s_datasets_epoch_%d.pt" % (args.logdir, "_".join(args.target_names), args.n_epochs - 1))) evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
def main(args): torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) # Lines below are make sure cuda is (almost) deterministic, can slow down training # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True, fix_length=args.fix_length) output_dimensions = get_num_classes_dataset(args.data_path, args.target_name) # Use name of dataset to get the arguments needed print("--- Starting with reading in the dataset ---") dataset = CSVDataset( text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_name) print("--- Finished with reading in the dataset ---") # Load the dataset and split it into train and test portions dloader = CustomDataLoader(dataset, TEXT, args.target_name) data_iterators = dloader.construct_iterators( vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) model = ConvNet(input_channels=1, output_dim=output_dimensions, filter_list=args.kernel_sizes, embed_matrix=TEXT.vocab.vectors, num_filters=args.num_filters, dropbout_probs=args.dropout) if args.class_weighting: weights = single_task_class_weighting(data_iterators[0]) criterion = nn.CrossEntropyLoss(weight=weights.to(args.device)) else: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(model, criterion, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=args.use_lengths, save_path=args.logdir, save_name="csv_dataset", tensorboard_dir=args.logdir + "/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip) print("Evaluating model") model.load_state_dict( torch.load(args.logdir + "/csv_dataset_epoch_%d.pt" % (args.n_epochs - 1))) evaluation(model, data_iterators[-1], criterion, device=args.device, include_lengths=args.use_lengths)
def main(args): torch.cuda.empty_cache() torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True, fix_length=args.fix_length) output_dimensions = get_num_classes_dataset(args.data_path, args.target_names) towers = { MLP(args.num_filters_experts * len(args.filter_list_experts), args.linear_layers, output_dim): name for output_dim, name in zip(output_dimensions, args.target_names) } # Use name of dataset to get the arguments needed dataset = CSVDataset( text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_names) # Load the dataset and split it into train and test portions dloader = CustomDataLoader(dataset, TEXT, args.target_names) data_iterators = dloader.construct_iterators( vectors="glove.6B.300d", vector_cache="../.vector_cache", batch_size=args.batch_size, device=torch.device("cpu")) # initialize the multiple CNNs and gating functions if args.gating_nets_type == "CNN": gating_networks = [ ConvNet(input_channels=1, filter_list=args.filter_list_g, embed_matrix=TEXT.vocab.vectors, num_filters=args.num_filters_g, output_dim=args.n_experts) for _ in range(len(args.target_names)) ] elif args.gating_nets_type == "LSTM": gating_networks = [ SimpleLSTM(TEXT.vocab.vectors, args.hidden_dim_g, args.n_experts, device=args.device, use_lengths=args.use_lengths) for _ in range(len(args.target_names)) ] elif args.gating_nets_type == "MLP": gating_networks = [ MLPGate(args.fix_length, args.n_experts, TEXT.vocab.vectors) for _ in range(len(args.target_names)) ] elif args.gating_nets_type == "TRANSFORMER": gating_networks = [ TransformerModel( max_seq_len=args.fix_length, num_outputs=args.n_experts, word_embedding_matrix=TEXT.vocab.vectors, feed_fwd_dim=args.transformer_fwd_dim, num_transformer_layers=args.num_transformer_layers, num_transformer_heads=args.num_transformer_heads, pos_encoding_dropout=0.2, classification_dropout=0.3, batch_first=True, pad_index=TEXT.vocab.stoi['pad']) for _ in range(len(args.target_names)) ] shared_layers = [ MultitaskConvNet(input_channels=1, filter_list=args.filter_list_experts, embed_matrix=TEXT.vocab.vectors, num_filters=args.num_filters_experts) for _ in range(args.n_experts) ] model = MultiGateMixtureofExperts( shared_layers=shared_layers, gating_networks=gating_networks, towers=towers, device=args.device, include_lens=args.use_lengths, batch_size=args.batch_size, gating_drop=args.gate_dropout, mean_diff=args.mean_diff, weight_adjust_mode=args.balancing_strategy) if args.class_weighting: task_weights = multitask_class_weighting(data_iterators[0], args.target_names) losses = { name: nn.CrossEntropyLoss(weight=task_weights[name].to(args.device)) for name in args.target_names } else: losses = {name: nn.CrossEntropyLoss() for name in args.target_names} optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma) train(model, losses, optimizer, scheduler, data_iterators[0], device=args.device, include_lengths=args.use_lengths, save_path=args.logdir, save_name="%s_datasets" % "_".join(args.target_names), tensorboard_dir=args.logdir + "/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval, clip_val=args.gradient_clip, balancing_epoch_num=args.balance_epoch_cnt, balancing_mode=args.balancing_strategy) print("Evaluating model") model.load_state_dict( torch.load( "%s/%s_datasets_epoch_%d.pt" % (args.logdir, "_".join(args.target_names), args.n_epochs - 1))) evaluation(model, data_iterators[-1], losses, device=args.device)