except FileExistsError: pass create_log(os.path.join(output_path, 'train.log'), params_log + '\n') # use GPU if available isCuda = torch.cuda.is_available() if isCuda: device = 'cuda' else: device = 'cpu' # Set the random seed for reproducible experiments torch.manual_seed(42) if isCuda: torch.cuda.manual_seed(42) text_field, label_field, train_dataset, valid_dataset, train_iterator, valid_iterator = loadDataset( dataset, batch_size=batch_size, device=device) # Build Vocabulary # vec = vocab.Vectors(embedding_path) text_field.build_vocab(train_dataset, valid_dataset, max_size=35000, min_freq=2) label_field.build_vocab(train_dataset, valid_dataset) vocab_size = len(text_field.vocab) label_size = len(label_field.vocab) - 1 with open(os.path.join(output_path, "text_field.field"), "wb") as f: dill.dump(text_field, f) with open(os.path.join(output_path, "label_field.field"), "wb") as f:
isCuda = torch.cuda.is_available() if isCuda: device = 'cuda' else: device = 'cpu' # Set the random seed for reproducible experiments random.seed(random_seed) # Set the random seed for reproducible experiments torch.manual_seed(random_seed) if isCuda: torch.cuda.manual_seed(random_seed) else: raise Exception text_field1, label_field1, train_dataset1, valid_dataset1, train_iterator1, valid_iterator1 = loadDataset( dataset1, batch_size, device) text_field2, label_field2, train_dataset2, valid_dataset2, train_iterator2, valid_iterator2 = loadDataset( dataset2, batch_size, device) # Build Vocabulary vec = vocab.Vectors(embedding_path) text_field1.build_vocab(train_dataset1, valid_dataset1, train_dataset2, valid_dataset2, max_size=35000, min_freq=2, vectors=vec) text_field2.build_vocab(train_dataset1, valid_dataset1, train_dataset2, valid_dataset2, max_size=35000, min_freq=2, vectors=vec) label_field1.build_vocab(train_dataset1, valid_dataset1) label_field2.build_vocab(train_dataset2, valid_dataset2) vocab_size1 = len(text_field1.vocab) label_size1 = len(label_field1.vocab) - 1