def build_model(vocab: Vocabulary, args, **kwargs) -> Model: print("Building the model") vocab_size = vocab.get_vocab_size("tokens") EMBED_DIMS = 200 if args.pretrained_WE_path: # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings embedder = BasicTextFieldEmbedder( {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size, pretrained_file=args.pretrained_WE_path, vocab=vocab, )}) else: embedder = BasicTextFieldEmbedder( {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size)}) encoder = CnnEncoder(embedding_dim=EMBED_DIMS, ngram_filter_sizes = (2,3,5), num_filters=5) # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f # encoder = BertPooler("bert-base-cased") # the output dim is just the num filters *len(ngram_filter_sizes) # construct the regularizer applicator regularizer_applicator = None if args.use_reg : l2_reg = L2Regularizer() regexes = [("embedder", l2_reg), ("encoder", l2_reg), ("classifier", l2_reg) ] regularizer_applicator = RegularizerApplicator(regexes) return MortalityClassifier(vocab, embedder, encoder,regularizer_applicator,**kwargs)
def build_model(vocab: Vocabulary, use_reg: bool = True) -> Model: print("Building the model") vocab_size = vocab.get_vocab_size("tokens") EMBED_DIMS = 300 # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings embedder = BasicTextFieldEmbedder({ "tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size) }) encoder = CnnEncoder( embedding_dim=EMBED_DIMS, ngram_filter_sizes=(2, 3, 4, 5), num_filters=5 ) # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f # encoder = BertPooler("bert-base-cased") # the output dim is just the num filters *len(ngram_filter_sizes) # construct the regularizer applicator regularizer_applicator = None if use_reg: l2_reg = L2Regularizer() regexes = [("embedder", l2_reg), ("encoder", l2_reg), ("classifier", l2_reg)] regularizer_applicator = RegularizerApplicator(regexes) return DecompensationClassifier(vocab, embedder, encoder, regularizer_applicator)
def test_l2_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 0.5))]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 1.))]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_l1_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, -1))]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def test_l2_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 0.5 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_l1_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": -1 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SentenceClassifier': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) question_encoder = Seq2VecEncoder.from_params(params.pop("question_encoder")) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, question_encoder=question_encoder, initializer=initializer, regularizer=regularizer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'ToxicModel': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) encoder = Seq2VecEncoder.from_params(params.pop("encoder")) classifier_feedforward = FeedForward.from_params(params.pop("classifier_feedforward")) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, encoder=encoder, classifier_feedforward=classifier_feedforward, initializer=initializer, regularizer=regularizer)
def test_from_params(self): params = Params({"regularizers": [("conv", "l1"), ("linear", {"type": "l2", "alpha": 10})]}) regularizer_applicator = RegularizerApplicator.from_params(params.pop("regularizers")) regularizers = regularizer_applicator._regularizers # pylint: disable=protected-access conv = linear = None for regex, regularizer in regularizers: if regex == "conv": conv = regularizer elif regex == "linear": linear = regularizer assert isinstance(conv, L1Regularizer) assert isinstance(linear, L2Regularizer) assert linear.alpha == 10
def test_frozen_params(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": -1 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) # freeze the parameters of the first linear for name, param in model.named_parameters(): if re.search(r"0.*$", name): param.requires_grad = False value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 55 because of bias (5*10 + 5) assert value.data.numpy() == 55
def build_model_Transformer(vocab: Vocabulary, use_reg: bool = True) -> Model: print("Building the model") vocab_size = vocab.get_vocab_size("tokens") EMBED_DIMS = 300 # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings embedder = PretrainedTransformerEmbedder(BERT_MODEL_NAME) encoder = BertPooler( BERT_MODEL_NAME ) # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f # encoder = BertPooler("bert-base-cased") # the output dim is just the num filters *len(ngram_filter_sizes) # construct the regularizer applicator regularizer_applicator = None if use_reg: l2_reg = L2Regularizer() regexes = [("embedder", l2_reg), ("encoder", l2_reg), ("classifier", l2_reg)] regularizer_applicator = RegularizerApplicator(regexes) return MortalityClassifier(vocab, embedder, encoder, regularizer_applicator)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = RegularizerApplicator() ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size('labels') self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.loss = torch.nn.BCEWithLogitsLoss() #self.loss = torch.nn.MultiLabelMarginLoss(reduction='sum') self.f1 = MultiLabelF1Measure() self.labels = [ 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ] initializer(self)
def train(self, args_hpo, index): """ trains the model, and return the metrics to the meta optimizer. :param args_hpo: :param index: :return: """ PrintColors.prYellow('\n===== training with: {}'.format(args_hpo)) PrintColors.prGreen('----- in {} mode -----'.format('train')) ''' ============ LOAD DATA ================================================================================ ''' starting_time = time.time() lm_dataset_reader = LanguageModelSegmentReader(global_constants=GLOBAL_CONSTANTS) train_data, val_data = (lm_dataset_reader.read(folder) for folder in [_train_data_path, _val_data_path]) lm_vocabulary = Vocabulary.from_instances(train_data + val_data) iterator = BasicIterator(batch_size=args_hpo.batch_size) iterator.index_with(lm_vocabulary) ''' ============ DEFINE MODEL ============================================================================= ''' ''' the class params 'pop' its parameters i.e. they disappear after first use. So we instantiate a Params instance for each model defining execution. More than that, they turn dicts into Mutable mappings and destroys the original dict. So here's your copy allennlp. Thanks. (I still love you) ''' token_embedding = Embedding.from_params(vocab=lm_vocabulary, params=Params(copy.deepcopy(GLOBAL_CONSTANTS.GLOVE_PARAMS_CONFIG))) token_embedder: TextFieldEmbedder = BasicTextFieldEmbedder({'tokens': token_embedding}) ''' define encoder to wrap up an lstm feature extractor ''' contextualizer: Seq2SeqEncoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_size=args_hpo.word_embedding_size, hidden_size=args_hpo.ed_ncoder_size, bidirectional=False, batch_first=True)) model = LanguageModel(vocab=lm_vocabulary, text_field_embedder=token_embedder, contextualizer=contextualizer, dropout=args_hpo.dropout, regularizer=RegularizerApplicator([('l2', L2Regularizer(alpha=args_hpo.l2))]), )\ .cuda(_device) ''' ============ TRAIN ================================================================================ ''' ''' callbacks ''' if index == 0: for file in os.listdir(os.path.join(*['.', 'lm_models'])): path = os.path.join(*['.', 'lm_models', file]) if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) serialization_path = 'models_lm_{}_{}'.format(_tag, index) serialization_path_longer = os.path.join(*['.', 'lm_models', serialization_path]) vocab_path = 'vocab_lm_{}_{}'.format(_tag, index) vocab_dir_longer = os.path.join(*['.', 'lm_models', vocab_path]) if not os.path.exists(serialization_path_longer): os.mkdir(serialization_path_longer) callbacks = list() ''' for validation ''' callbacks.append(validate.Validate(validation_data=val_data, validation_iterator=iterator)) ''' for early stopping. it tracks 'loss' returned by model.forward() ''' callbacks.append(track_metrics.TrackMetrics(patience=3)) ''' for grad clipping ''' callbacks.append(gradient_norm_and_clip.GradientNormAndClip(grad_clipping=args_hpo.clip)) ''' for checkpointing TODO: NOTE:serialization path CANNOT exist before training ?? ''' model_checkpointer = checkpointer.Checkpointer(serialization_dir=serialization_path_longer, num_serialized_models_to_keep=1) callbacks.append(checkpoint.Checkpoint(checkpointer=model_checkpointer)) ''' for sample generations ''' callback_trainer = CallbackTrainer( model=model, training_data=train_data, iterator=iterator, optimizer=torch.optim.Adam(model.parameters(), lr=args_hpo.lr), num_epochs=_n_epochs, serialization_dir=serialization_path_longer, cuda_device=_device, callbacks=callbacks ) ''' trainer saves the model, but the vocabulary needs to be saved, too ''' lm_vocabulary.save_to_files(vocab_dir_longer) ''' check the metric names to synchronize with the class ''' metrics = callback_trainer.train() metrics['time_consumed(hrs)'] = round((time.time() - starting_time) / 3600, 4) return metrics
class Net(torch.nn.Module): def __init__(self): super().__init__() self.linear1 = torch.nn.Linear(2, 3) self.linear2 = torch.nn.Linear(3, 2) self.conv = torch.nn.Conv1d(2, 2, 2) def forward(self, inputs): pass print("Using individual regularizers:") model = Net() init_const = ConstantInitializer(val=10.0) init_const(model.linear1.weight) init_const(model.linear2.weight) l1_regularizer = L1Regularizer(alpha=0.01) print(l1_regularizer(model.linear1.weight)) # 0.01 * 10 * 6 = 0.6 l2_regularizer = L2Regularizer(alpha=0.01) print(l2_regularizer(model.linear2.weight)) # 0.01 * (10)^2 * 6 print("Using an applicator:") applicator = RegularizerApplicator(regexes=[ ("linear1.weight", L1Regularizer(alpha=0.01)), ("linear2.weight", L2Regularizer()), ]) print(applicator(model)) # 0.6 + 6
class Net(torch.nn.Module): def __init__(self): super().__init__() self.linear1 = torch.nn.Linear(2, 3) self.linear2 = torch.nn.Linear(3, 2) self.conv = torch.nn.Conv1d(2, 2, 2) def forward(self, inputs): pass print('Using individual regularizers:') model = Net() init_const = ConstantInitializer(val=10.) init_const(model.linear1.weight) init_const(model.linear2.weight) l1_regularizer = L1Regularizer(alpha=0.01) print(l1_regularizer(model.linear1.weight)) # 0.01 * 10 * 6 = 0.6 l2_regularizer = L2Regularizer(alpha=0.01) print(l2_regularizer(model.linear2.weight)) # 0.01 * (10)^2 * 6 print('Using an applicator:') applicator = RegularizerApplicator( regexes=[('linear1.weight', L1Regularizer(alpha=.01)), ('linear2.weight', L2Regularizer())]) print(applicator(model)) # 0.6 + 6
def train_valid_base_text_decision_fix_text_features_model( model_name: str, single_round_label: bool, use_only_prev_round: bool, train_data_file_name: str, validation_data_file_name: str, no_history: bool = False, func_batch_size: int = 9, numbers_columns: list = None, add_numeric_data: bool = True): """ This function train and validate model that use fix texts features only. :param: model_name: the full model name :param single_round_label: the label to use: single round of total payoff :param use_only_prev_round: if to use all the history or only the previous round :param train_data_file_name: the name of the train_data to use :param validation_data_file_name: the name of the validation_data to use :param no_history: if we don't want to use any history data :param func_batch_size: the batch size to use :param model_name: the name of the model we run :param numbers_columns: the names of the columns to use for the numeric data :param add_numeric_data: if we want to add numbers data :return: """ reader = TextExpDataSetReader(add_numeric_data=add_numeric_data, use_only_prev_round=use_only_prev_round, single_round_label=single_round_label, three_losses=True, fix_text_features=True, no_history=no_history, numbers_columns_name=numbers_columns) train_data_file_inner_path = os.path.join(data_directory, train_data_file_name) validation_data_file_inner_path = os.path.join(data_directory, validation_data_file_name) train_instances = reader.read(train_data_file_inner_path) validation_instances = reader.read(validation_data_file_inner_path) vocab = Vocabulary() # TODO: change this if necessary # batch_size should be: 10 or 9 depends on the input # and not shuffle so all the data of the same pair will be in the same batch iterator = BasicIterator( batch_size=func_batch_size) # , instances_per_epoch=10) # sorting_keys=[('sequence_review', 'list_num_tokens')]) iterator.index_with(vocab) # the shape of the flatten data rep if 'bert' in train_data_file_name: # fix features are BERT vector text_feedtorward = FeedForward(input_dim=reader.max_tokens_len, num_layers=2, hidden_dims=[300, 50], activations=ReLU(), dropout=[0.0, 0.0]) reader.max_tokens_len = 50 else: text_feedtorward = None feed_forward_input_dim = reader.max_seq_len * (reader.max_tokens_len + reader.number_length) feed_forward_classification = FeedForward(input_dim=feed_forward_input_dim, num_layers=1, hidden_dims=[2], activations=LeakyReLU(), dropout=[0.3]) criterion_classification = nn.BCEWithLogitsLoss() metrics_dict = { 'Accuracy': CategoricalAccuracy() # BooleanAccuracy(), # 'auc': Auc(), # 'F1measure': F1Measure(positive_label=1), } model = models.BasicFixTextFeaturesDecisionModel( vocab=vocab, classifier_feedforward_classification=feed_forward_classification, criterion_classification=criterion_classification, metrics_dict=metrics_dict, max_tokens_len=reader.max_tokens_len, text_feedforward=text_feedtorward, regularizer=RegularizerApplicator([("", L1Regularizer())]), ) optimizer = optim.Adam(model.parameters(), lr=0.1) num_epochs = 100 run_log_directory = utils.set_folder( datetime.now().strftime( f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs') trainer = Trainer( model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_instances, validation_dataset=validation_instances, num_epochs=num_epochs, shuffle=False, serialization_dir=run_log_directory, patience=10, histogram_interval=10, ) model_dict = trainer.train() print(f'{model_name}: evaluation measures are:') for key, value in model_dict.items(): if 'accuracy' in key: value = value * 100 print(f'{key}: {value}') # save the model predictions model.predictions.to_csv(os.path.join(run_log_directory, 'predictions.csv'))
''' the language model used Glove but we just build an embedder to load the trained parameters ''' token_embedding = Embedding( num_embeddings=vocabulary.get_vocab_size(namespace='tokens'), embedding_dim=combination.word_embedding_size, padding_index=0) token_embedder: TextFieldEmbedder = BasicTextFieldEmbedder( {'tokens': token_embedding}) ''' define encoder to wrap up an lstm feature extractor ''' contextualizer: Seq2SeqEncoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_size=combination.word_embedding_size, hidden_size=combination.ed_ncoder_size, bidirectional=False, batch_first=True)) model = LanguageModel(vocab=vocabulary, text_field_embedder=token_embedder, contextualizer=contextualizer, dropout=combination.dropout, regularizer=RegularizerApplicator([('l2', L2Regularizer(alpha=combination.l2))]), ) \ .cuda(device) model.load_state_dict(torch.load(open(language_model_path, 'rb')), strict=True) dataset_reader = LanguageModelSegmentReader(global_constants=GLOBAL_CONSTANTS) language_model_predictor = Predictor(model=model, dataset_reader=dataset_reader) val_data_path = os.path.join('.', 'data_seg_val_toytoy') instances = dataset_reader.read(val_data_path) predictions = [ language_model_predictor.predict_instance(instance) for instance in instances ]
def build_model( vocab, embed_dim: int = 100, hid_dim: int = 100, min_dec_step: int = 2, max_decoding_steps: int = 3, fix_edu_num: int = -1, use_elmo: bool = False, dropout=0.5, dropout_emb=0.2, span_encoder_type='self_attentive', attn_type='dot', schedule_ratio_from_ground_truth=0.7, pretrain_embedding=None, nenc_lay: int = 1, mult_orac_sampling: bool = True, compression: bool = True, word_token_indexers=None, alpha: float = 1.0, dbg: bool = False, dec_avd_trigram_rep: bool = True, aggressive_compression: int = -1, keep_threshold: float = 0.5, weight_alpha=0.0, bias_alpha=0.0, abs_board_file: str = "/home/cc/exComp/board.txt", compress_leadn=-1, gather='mean', abs_dir_root: str = "/scratch/cluster/jcxu", serilization_name="", load_save_model: str = None ): model = Seq2IdxSum( vocab=vocab, word_embedding_dim=embed_dim, hidden_dim=hid_dim, min_dec_step=min_dec_step, max_decoding_steps=max_decoding_steps, fix_edu_num=fix_edu_num, use_elmo=use_elmo, span_encoder_type=span_encoder_type, dropout=dropout, dropout_emb=dropout_emb, attn_type=attn_type, schedule_ratio_from_ground_truth=schedule_ratio_from_ground_truth, pretrain_embedding_file=pretrain_embedding, nenc_lay=nenc_lay, mult_orac_sampling=mult_orac_sampling, word_token_indexers=word_token_indexers, compression=compression, alpha=alpha, dbg=dbg, dec_avd_trigram_rep=dec_avd_trigram_rep, aggressive_compression=aggressive_compression, keep_threshold=keep_threshold, regularizer=RegularizerApplicator([("weight", L2Regularizer(weight_alpha)), ("bias", L1Regularizer(bias_alpha))]), abs_board_file=abs_board_file, gather=gather, compress_leadn=compress_leadn, abs_dir_root=abs_dir_root, serilization_name=serilization_name ) if load_save_model: model.load_state_dict(torch.load(load_save_model, map_location=get_device())) # `` model.load_state_dict(torch.load("/path/to/model/weights.th"))`` # model = torch.nn.DataParallel(model) device = get_device() model = model.to(device) return model
def run_training_loop(): tokenizer = BERTTokenizer(vocab_file='/Users/tianhongzxy/Downloads/BiSentESIM/BiSentESIM/My-pipeline/allennlp_tutorial/BertTokenizer/vocab.txt') # tokenizer = BERTTokenizer('bert-base-multilingual-cased') # same as above # Try to use ELMo # tokenindexer = ELMoTokenCharactersIndexer() # elmo_tokens = tokenindexer.tokens_to_indices([Token("happy")], None) # print(len(elmo_tokens["elmo_tokens"][0]), elmo_tokens) # Try to use BERT # tokenizer = PretrainedTransformerTokenizer( # model_name="bert-base-multilingual-cased", # add_special_tokens=True, # max_length=512 # ) # token_indexer = PretrainedTransformerIndexer( # model_name="bert-base-multilingual-cased", # max_length=512, # ) cached_directory = None # "cached_dir" dataset_reader = ClassificationTsvReader(tokenizer=tokenizer, cache_directory=cached_directory) print("Reading data") train_data = dataset_reader.read(file_path='/Users/tianhongzxy/Downloads/contradictory-my-dear-watson/train.txt') pretrained_files = None # {"tokens": "/Users/tianhongzxy/Downloads/BiSentESIM/BiSentESIM/embedding/glove.6B.300d.txt"} cuda_device = -1 batch_size = 8 vocab = build_vocab(train_data, pretrained_files=pretrained_files, include_full_pretrained_words=False) init_uniform = XavierUniformInitializer() # init_uniform(model.embedder.token_embedder_tokens.weight) init_const = ConstantInitializer(val=0) # init_const(model.classifier.bias) init_normal = NormalInitializer(mean=0., std=1.) # init_normal(model.classifier.weight) applicator = InitializerApplicator( regexes=[ ('embedder.*', init_uniform), ('classifier.*weight', init_normal), ('classifier.*bias', init_const) ] ) regularizer = RegularizerApplicator( regexes=[ ('embedder.*', L2Regularizer(alpha=1e-3)), ('classifier.*weight', L2Regularizer(alpha=1e-3)), # ('classifier.*bias', L1Regularizer(alpha=1e-2)) # 不要对bias进行正则,否则容易欠拟合 ] ) model = build_model(vocab, embedding_dim=10, pretrained_file=None, # pretrained_files["tokens"] initializer=applicator, regularizer=regularizer ) if cuda_device >= 0: model = model.cuda(cuda_device) # split train data into train & dev data from allennlp.data.dataset_readers import AllennlpDataset print('origin train data size: ', len(train_data)) train_data, dev_data = train_test_split(train_data, test_size=0.2, random_state=20020206) assert type(train_data[0]) == type(dev_data[0]) == Instance train_data, dev_data = AllennlpDataset(train_data), AllennlpDataset(dev_data) print('train data size: ', len(train_data), 'dev data size', len(dev_data)) assert type(train_data) == type(dev_data) == AllennlpDataset train_data.index_with(vocab) dev_data.index_with(vocab) train_loader, dev_loader = build_data_loaders(train_data=train_data, dev_data=dev_data, batch_size=batch_size) with tempfile.TemporaryDirectory() as serialization_dir: # serialization_dir = 'temp_dir/' trainer = build_trainer( model=model, serialization_dir=serialization_dir, train_loader=train_loader, dev_loader=dev_loader, num_epochs=5, cuda_device=cuda_device, patience=5 ) print("Starting training") trainer.train() print("Finished training") # Evaluate model on test data # print("Starting testing") # test_data = dataset_reader.read('test.txt') # test_data.index_with(vocab) # data_loader = DataLoader(test_data, batch_size=batch_size) # results = evaluate(model, data_loader, cuda_device=cuda_device) # print('Test results: ', results) # outputs = model.forward_on_instances(instances) # print(outputs) return model, dataset_reader