def __init__(self, settings, vocabs): self.vocabs = vocabs if not settings.disable_external: self.external = External(settings.external) else: self.external = External(None) self.model = None self.optimizer = None self.train_data = None self.test_data = None self.epoch_offset = 0 self.settings = settings if settings.tree: #self.dec = dd.DependencyDecoder() self.dec = True else: self.dec = None # which targets to take self.ot = settings.ot self.pt = settings.pt self.device = settings.device self.loss_interpolation = settings.loss_interpolation self.model_interpolation = settings.model_interpolation self.batch_size = settings.batch_size self.model = BiLSTMModel(self.vocabs, self.external, settings) self.model = self.model.to(self.settings.device) self.optimizer = torch.optim.Adam(self.model.parameters(), betas=(settings.beta1, settings.beta2), weight_decay=settings.l2) self._store_settings()
parser_model = ParserModel(dep_vocab, parser_config, dep_vec) parser_model.load_state_dict(torch.load(parser_config.load_model_path, \ map_location=lambda storage, loc: storage)) vocab = creatVocab(config.train_file, config.min_occur_count) vec = vocab.load_pretrained_embs(config.pretrained_embeddings_file) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu >= 0: torch.cuda.set_device(args.gpu) config.use_cuda = True print("GPU ID: ", args.gpu) gpu_id = args.gpu model = BiLSTMModel(vocab, config, parser_config, vec) if config.use_cuda: #torch.backends.cudnn.enabled = True model = model.cuda() parser_model = parser_model.cuda() classifier = SentenceClassifier(config, model, vocab, parser_model, dep_vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, classifier, vocab, dep_vocab, config)
args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) torch.set_num_threads(args.thread) vocab = creatVocab(config.train_file, config.min_occur_count) vec = vocab.load_pretrained_embs(config.pretrained_embeddings_file) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False if gpu and args.gpu >= 0: config.use_cuda = True torch.cuda.set_device(args.gpu) print("GPU ID: ", args.gpu) print("\nGPU using status: ", config.use_cuda) # print(config.use_cuda) model = BiLSTMModel(vocab, config, vec) model = model.cpu() if config.use_cuda: #torch.backends.cudnn.enabled = True model = model.cuda() classifier = SentenceClassifier(model, vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, classifier, vocab, config)
vocab = creat_vocab(config.train_file, config.bert_vocab_file, config.min_occur_count) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu >= 0: torch.cuda.set_device(args.gpu) config.use_cuda = True print("GPU ID: ", args.gpu) gpu_id = args.gpu bert = BertExtractor(config) model = BiLSTMModel(vocab, config, parser_config, bert.bert_hidden_size, bert.bert_layers) if config.use_cuda: # torch.backends.cudnn.enabled = True model = model.cuda() bert = bert.cuda() parser_model = parser_model.cuda() labeler = SequenceLabeler(model, bert, parser_model) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, labeler, vocab, dep_vocab, config)
def train(self): checkpoint_path = os.path.join(self.config.model_dir, 'model.ckpt') data_dirs = self.config.data_paths data_helper = DataHelper(self.train_tokens, self.validation_tokens, self.train_tags) data_helper.dump_data_to_file(self.config.model_dir) tf.reset_default_graph() print('=' * 50) print('=' * 50) print(' [*] Checkpoint path: %s' % checkpoint_path) print(' [*] Loading training data from: %s' % data_dirs) print(' [*] Using model: %s' % self.config.model_dir) print(hparams_debug_string()) token_count = data_helper.gettokencount() tag_count = data_helper.gettagcount() cap_feat_count = 4 pad_index = data_helper.word2idx('<PAD>') print('Tag count : %d' % tag_count) print('Token count : %d' % token_count) print('Cap Features count : %d' % cap_feat_count) print('PAD index : %d' % pad_index) with tf.Graph().as_default() as graph: sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: try: global_step = tf.Variable(0, name='global_step', trainable=False) saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.variable_scope('model'): model = BiLSTMModel() model.init() train_vars = model.build_layers( hparams=hparams, vocabulary_size=token_count, n_cap_feats=cap_feat_count, n_tags=tag_count) model.compute_predictions() model.compute_loss(n_tags=tag_count, PAD_index=pad_index) model.perform_optimization(global_step) train_stat = model.add_stats() print('No. of trainable variables : %d' % train_vars) sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter( self.config.model_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) start_step = sess.run(global_step) print('-' * 20 + ' Starting new training ' + '-' * 20) learning_rate = self.config.learning_rate for epoch in range(self.config.n_epochs): print('-' * 20 + ' Epoch {}'.format(epoch + 1) + '-' * 20) for x_batch, c_batch, y_batch, lengths in data_helper.batches_generator( self.config.batch_size, self.train_tokens, self.train_tags): step, loss = model.train_on_batch( sess, global_step, x_batch, c_batch, y_batch, lengths, learning_rate, self.config.dropout_keep_probability) if step % 100 == 0: print('[Step : %d] loss : %f' % (step, loss)) learning_rate = learning_rate / self.config.learning_rate_decay if (epoch + 1) % self.config.checkpoint_interval == 0: print('Saving checkpoint to : %s-%d' % (checkpoint_path, epoch + 1)) saver.save(sess, checkpoint_path, global_step=epoch) if (epoch + 1) % self.config.test_interval == 0: print('Train data evaluation') data_helper.eval_conll(model, sess, self.train_tokens, self.train_tags, short_report=True) print('Validation data evaluation:') data_helper.eval_conll(model, sess, self.validation_tokens, self.validation_tags, short_report=True) except Exception as e: print('Exitin due to exception : [%s]!!!' % e) traceback.print_exc() log.close() return print('-' * 20 + ' Training completed! ' + '-' * 20) print('Training data evaluation:') data_helper.eval_conll(model, sess, self.train_tokens, self.train_tags, short_report=False) print('Validation data evaluation:') data_helper.eval_conll(model, sess, self.validation_tokens, self.validation_tags, short_report=False) print('Training data evaluation:') data_helper.eval_conll(model, sess, self.test_tokens, self.test_tags, short_report=True) return
vocab = creatVocab(config.train_file, config.min_occur_count) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu >= 0: torch.cuda.set_device(args.gpu) config.use_cuda = True print("GPU ID: ", args.gpu) gpu_id = args.gpu elmo = ElmoEmbedder(config.elmo_option_file, config.elmo_weight_file, gpu_id) elmo_layers = elmo.elmo_bilm.num_layers elmo_dims = elmo.elmo_bilm.get_output_dim() model = BiLSTMModel(vocab, config, (elmo_layers, elmo_dims)) if config.use_cuda: # torch.backends.cudnn.enabled = True model = model.cuda() classifier = SentenceClassifier(model, elmo, vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, classifier, vocab, config)
vocab = creatVocab(config.train_file, config.min_occur_count) vec = vocab.load_initialize_embs(config.pretrained_embeddings_file) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu != -1: config.use_cuda = True torch.cuda.set_device(args.gpu) print('GPU ID:' + str(args.gpu)) gpu_id = args.gpu print("\nGPU using status: ", config.use_cuda) elmo = ElmoEmbedder(config.elmo_option_file, config.elmo_weight_file, gpu_id) elmo_layers = elmo.elmo_bilm.num_layers elmo_dims = elmo.elmo_bilm.get_output_dim() model = BiLSTMModel(vocab, config, vec, (elmo_layers, elmo_dims)) if config.use_cuda: torch.backends.cudnn.enabled = False model = model.cuda(args.gpu) classifier = BiSententClassifier(model, elmo, vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, classifier, vocab, config)
args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) torch.set_num_threads(args.thread) vocab = creatVocab(config.train_file, config.min_occur_count) vec1 = vocab.load_initialize_embs(config.pretrained_embeddings_file) vec2 = vocab.load_pretrained_embs(config.pretrained_embeddings_file) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu != -1: config.use_cuda = True torch.cuda.set_device(args.gpu) print('GPU ID:' + str(args.gpu)) gpu_id = args.gpu print("\nGPU using status: ", config.use_cuda) model = BiLSTMModel(vocab, config, vec1) extword_embed = ExtWord(vocab, config, vec2) if config.use_cuda: torch.backends.cudnn.enabled = False model = model.cuda(args.gpu) extword_embed = extword_embed.cuda(args.gpu) classifier = BiSententClassifier(model, extword_embed, vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, classifier, vocab, config)
def main(): # Read datasets data = Dataset(args.DATA_DIR) sents, tags = data.get_all_data() # Construct the model MyModel = BiLSTMModel(args.MAX_SEQ_LEN, args.EMBEDDING, args.LSTM_HIDDEN_UNITS, args.LSTM_DENSE_DIM, data.get_nwords(), data.get_ntags()) model = MyModel.define_model() num_train_sents = len(data.train_sents) num_val_sents = len(data.val_sents) num_test_sents = len(data.test_sents) print( "# train sents = {0} \n # of val sents = {1} \n # of test sents = {2}". format(num_train_sents, num_val_sents, num_test_sents), flush=True) # indexes to train, val and test data partition = { "train": list(range(num_train_sents)), "val": list(range(num_val_sents)), "test": list(range(num_test_sents)) } # Parameters params = { 'dim': args.MAX_SEQ_LEN, 'batch_size': args.BATCH_SIZE, 'n_classes': data.get_ntags(), 'shuffle': True, 'word2idx': data.get_word2idx(), 'tag2idx': data.get_tag2idx() } # Generators training_generator = DG.DataGenerator(partition['train'], data.train_sents, data.train_tags, **params) validation_generator = DG.DataGenerator(partition['val'], data.val_sents, data.val_tags, **params) # Train model on dataset history = model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, epochs=args.NUM_EPOCHS, verbose=1) # Parameters params_test = { 'dim': args.MAX_SEQ_LEN, 'batch_size': 1, 'n_classes': data.get_ntags(), 'shuffle': False, 'word2idx': data.get_word2idx(), 'tag2idx': data.get_tag2idx() } # Make predictions testing_generator = DG.DataGenerator(partition['test'], data.test_sents, data.train_tags, **params_test) pred_test = model.predict_generator(generator=testing_generator, steps=num_test_sents) pred_test = np.argmax(pred_test, axis=-1) # print(pred_test.shape) def pad(x): x1 = [ tgs + ([data.get_tag2idx()["PAD"]] * (args.MAX_SEQ_LEN - len(tgs))) for tgs in x ] x2 = [tgs[:args.MAX_SEQ_LEN] for tgs in x1] return np.array(x2) test_tags_padded = pad(data.test_tags) # print(test_tags_padded.shape) def get_measures(yTrue, yPred): y1 = yTrue.reshape(1, -1).squeeze() y2 = yPred.reshape(1, -1).squeeze() P = precision_score(y1, y2, average=None) R = recall_score(y1, y2, average=None) F1 = f1_score(y1, y2, average=None) print("Precision=", flush=True) print(P, flush=True) print("Recall=", flush=True) print(R, flush=True) print("F1 score=", flush=True) print(F1, flush=True) print("Test...", flush=True) get_measures(test_tags_padded, pred_test)
class ModelInteractor: """Responsible for training the model and using it to make predictions""" @staticmethod def factory(settings, vocabs): if settings.unfactorized: return ModelInteractorUnfactorized(settings, vocabs) else: return ModelInteractorfactorized(settings, vocabs) def __init__(self, settings, vocabs): self.vocabs = vocabs if not settings.disable_external: self.external = External(settings.external) else: self.external = External(None) self.model = None self.optimizer = None self.train_data = None self.test_data = None self.epoch_offset = 0 self.settings = settings if settings.tree: #self.dec = dd.DependencyDecoder() self.dec = True else: self.dec = None # which targets to take self.ot = settings.ot self.pt = settings.pt self.device = settings.device self.loss_interpolation = settings.loss_interpolation self.model_interpolation = settings.model_interpolation self.batch_size = settings.batch_size self.model = BiLSTMModel(self.vocabs, self.external, settings) self.model = self.model.to(self.settings.device) self.optimizer = torch.optim.Adam(self.model.parameters(), betas=(settings.beta1, settings.beta2), weight_decay=settings.l2) self._store_settings() def _store_settings(self): with open(self.settings.dir + "settings.json", "w") as fh: json.dump( { k: v for k, v in self.settings.__dict__.items() if k not in "device".split() }, fh) #for key, val in self.settings.__dict__.items(): # print("{}: {}".format(key,val), file=fw) def upd_from_other(self, other, *args): other_dict = other.model.state_dict() print(other_dict.keys()) model_dict = self.model.state_dict() od = {} for k, v in other_dict.items(): for a in args: if k.startswith(a): od[k] = v #other_dict = {k: v for k, v in other_dict.items() if k in args} # 2. overwrite entries in the existing state dict print(od.keys()) model_dict.update(od) # 3. load the new state dict self.model.load_state_dict(model_dict) def freeze_params(self, *freeze): froze = [] for name, param in self.model.named_parameters(): for f in freeze: if name.startswith(f): froze.append(name) param.requires_grad = False print(f"froze {froze} parameters") def _init_training_data(self, train_path): self.train_data = MyDataset(train_path, vocabs=self.vocabs, external=self.external, settings=self.settings, elmo=self.settings.elmo_train, vec_dim=self.settings.vec_dim) return DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, collate_fn=padded_collate) def _init_test_data(self, test_path, elmo_path=None): self.test_data = MyDataset(test_path, vocabs=self.vocabs, external=self.external, settings=self.settings, elmo=elmo_path, vec_dim=self.settings.vec_dim) return DataLoader(self.test_data, batch_size=self.batch_size, shuffle=False, collate_fn=padded_collate) def _run_train_batch(self, batch, optimizer, gradient_clipping=True): raise NotImplementedError() def _run_train_epoch(self, data, epoch, verbose=True, gradient_clipping=True): self.model.train() print_every = int(len(data) / 100) + 1 total_loss = 0 sequences_trained = 0 debug_loss = [] debug_timer = time.time() for i, batch in enumerate(data): batch.to(self.device) loss = self._run_train_batch(batch, self.optimizer, gradient_clipping) debug_loss.append(loss) if torch.cuda.is_available(): print(torch.cuda.memory_allocated(self.device) / 10**6) print(torch.cuda.memory_cached(self.device) / 10**6) torch.cuda.empty_cache() print(torch.cuda.memory_cached(self.device) / 10**6) if verbose and (i + 1) % print_every == 0: percentage = int((i + 1) / print_every) print("{}% of epoch {} ".format(percentage, epoch) + "completed, current loss is {}".format( round(sum(debug_loss) / len(debug_loss), 6)) + " averaged over the past {} sentences".format( len(debug_loss) * batch.sentence_count) + " (took {} seconds)".format( round(time.time() - debug_timer, 2)), flush=True) debug_loss = [] debug_timer = time.time() total_loss += loss sequences_trained += batch.sentence_count return total_loss, sequences_trained def train(self): settings = self.settings print("Training is starting for {} epochs using ".format( settings.epochs) + "{} with the following settings:".format(self.device)) print() for key, val in settings.__dict__.items(): print("{}: {}".format(key, val)) print(flush=True) train_dataloader = self._init_training_data(settings.train) best_f1 = 0 best_f1_epoch = 1 + self.epoch_offset for epoch in range(1 + self.epoch_offset, settings.epochs + 1 + self.epoch_offset): start_time = time.time() total_loss, sequences_trained = self._run_train_epoch( train_dataloader, epoch, not settings.quiet, not settings.disable_gradient_clip) total_time = round(time.time() - start_time, 2) print("#" * 50) print("Epoch {}".format(epoch)) print("loss {}".format(total_loss)) print("execution time {}s".format(total_time) \ + " ({} trained sequences/s)".format(round(sequences_trained/(total_time)))) print("#" * 50, flush=True) if not settings.disable_val_eval: entries, predicted, other_predicted = self.predict( settings.val, settings.elmo_dev) #a,d,b,c = zip(*((entry[0], len(entry[4]), entry[1].numpy().shape, predicted[entry[0]].numpy().shape) for entry in entries)) #print([(x,w,y,z) for x,w,y,z in zip(a,d,b,c) if y!=z]) f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(), predicted[entry[0]].numpy()) for entry in entries))) print("Primary Dev F1 on epoch {} is {:.2%}".format(epoch, f1)) if len(other_predicted) > 0: other_f1, _ = sc.score(*zip( *((entry[1][self.ot].numpy(), other_predicted[entry[0]].numpy()) for entry in entries))) print("Secondary Dev F1 on epoch {} is {:.2%}".format( epoch, other_f1)) #f1 = sc.score() improvement = f1 > best_f1 elapsed = epoch - best_f1_epoch es_active = settings.early_stopping > 0 if (es_active and not improvement and elapsed == settings.early_stopping): print("Have not seen any improvement for {} epochs".format( elapsed)) print("Best F1 was {} seen at epoch #{}".format( best_f1, best_f1_epoch)) break else: if improvement: best_f1 = f1 best_f1_epoch = epoch print("Saving {} model".format(best_f1_epoch)) self.save("best_model.save", epoch) else: print("Have not seen any improvement for {} epochs". format(elapsed)) print("Best F1 was {:.2%} seen at epoch #{}".format( best_f1, best_f1_epoch)) if settings.enable_train_eval: entries, predicted, other_predicted = self.predict( settings.train, settings.elmo_train) train_f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(), predicted[entry[0]].numpy()) for entry in entries))) print("Sem Train F1 on epoch {} is {:.2%}".format( epoch, train_f1)) if len(other_predicted) > 0: other_train_f1, _ = sc.score(*zip( *((entry[1][self.ot].numpy(), other_predicted[entry[0]].numpy()) for entry in entries))) print("Syn Train F1 on epoch {} is {:.2%}".format( epoch, other_train_f1)) if settings.save_every: self.save("{}_epoch{}.save".format(int(time.time()), epoch), epoch) else: self.save("last_epoch.save", epoch) def _run_test_batch(self, batch): raise NotImplementedError() def _clip_grad(self, gradient_clipping): if gradient_clipping: torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5) def predict(self, data_path, elmo_path=None): print("Predicting data from", data_path) test_loader = self._init_test_data(data_path, elmo_path) self.model.eval() predictions = {} other_predictions = {} for batch in test_loader: batch.to(self.device) print(".", end="") sys.stdout.flush() with torch.no_grad(): pred, other_pred = self._run_test_batch(batch) predictions.update(pred) other_predictions.update(other_pred) #for k,v in predictions.items(): # print(k, v.shape) print("Done") #return self.test_data.data, predictions return self.test_data, predictions, other_predictions def save(self, path, epoch): cuda_state = torch.cuda.get_rng_state() if torch.cuda.is_available( ) else None state = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), "vocabs": self.vocabs, "rng_state": torch.get_rng_state(), "cuda_rng_state": cuda_state, "epoch": epoch } torch.save(state, self.settings.dir + path) def load(self, path): print("Restoring model from {}".format(path)) state = torch.load(path) self.model.load_state_dict(state["model"]) self.model = self.model.to(self.settings.device) self.optimizer.load_state_dict(state["optimizer"]) self.vocabs = state["vocabs"] torch.set_rng_state(state["rng_state"]) if torch.cuda.is_available(): torch.cuda.set_rng_state(state["cuda_rng_state"]) self.epoch_offset = state["epoch"] def other_loss(self, other_edge_scores, other_label_scores, batch, loss): ##### if torch.cuda.is_available(): print("other_loss") print(torch.cuda.memory_allocated(self.device) / 10**6) print(torch.cuda.memory_cached(self.device) / 10**6) torch.cuda.empty_cache() print(torch.cuda.memory_cached(self.device) / 10**6) other_label_scores_transposed = other_label_scores.transpose(0, 1) other_edge_targets = (batch.targetss[self.ot] > 0) other_unpadded_edge_scores = other_edge_scores[batch.unpadding_mask] other_unpadded_edge_targets = other_edge_targets[batch.unpadding_mask] other_edge_loss = F.binary_cross_entropy_with_logits( other_unpadded_edge_scores, other_unpadded_edge_targets.float()) other_gold_mask = other_edge_targets other_gold_mask_expanded = other_gold_mask.unsqueeze(0).expand_as( other_label_scores_transposed) other_gold_targets = batch.targetss[self.ot][other_gold_mask] if len(other_gold_targets) > 0: # Extract the scores for the existing labels other_scores = other_label_scores_transposed[ other_gold_mask_expanded] # (labels x predictions) other_scores = other_scores.view(-1, len(other_gold_targets)) # scores.t() => [#predictions x #labels], gold_target [#predictions] # gold_target needs to contain the indices of the correct labels. # Since gold_target labels are in the range 1..#labels, 1 is subtracted other_label_loss = F.cross_entropy(other_scores.t(), other_gold_targets - 1) other_loss = self.loss_interpolation * other_label_loss + ( 1 - self.loss_interpolation) * other_edge_loss else: other_loss = (1 - self.loss_interpolation) * other_edge_loss loss *= 1 - self.model_interpolation loss += other_loss * self.model_interpolation return loss ##### def other_predict(self, other_edge_scores, other_label_scores, i, size, other_predictions, batch): #### other_unpadded_edge_scores = other_edge_scores[i, :size, :size] other_unpadded_label_scores = other_label_scores[i, :, :size, :size] other_edge_prediction = self.predict_edges(other_unpadded_edge_scores) other_label_prediction = self.predict_labels( other_unpadded_label_scores) #.cpu().numpy() other_combined_prediction = (other_edge_prediction * other_label_prediction) other_predictions[batch.graph_ids[i]] = other_combined_prediction.cpu()
default=-1, type=int, help='Use id of gpu, -1 if cpu.') argparser.add_argument('--input', default='dev/dev.txt') argparser.add_argument('--output', default='dev/dev.txt.out') args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) torch.set_num_threads(args.thread) vocab = pickle.load(open(config.load_vocab_path, 'rb+')) vec = vocab.create_placeholder_embs(config.pretrained_embeddings_file) config.use_cuda = False # if gpu and args.use_cuda: if gpu and args.gpu >= 0: config.use_cuda = True torch.cuda.set_device(args.gpu) print('GPU ID:' + str(args.gpu)) model = BiLSTMModel(vocab, config, vec) model.load_state_dict( torch.load(config.load_model_path, map_location=lambda storage, loc: storage)) if config.use_cuda: # torch.backends.cudnn.enabled = True model = model.cuda(device=args.gpu) classifier = CloneDetection(model, vocab, config.use_cosine) evaluate(args.input, classifier, vocab, args.output)
args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) torch.set_num_threads(args.thread) vocab = creatVocab(config.train_file, config.bert_vocab_file, config.min_occur_count) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) config.use_cuda = False gpu_id = -1 if gpu and args.gpu != -1: config.use_cuda = True torch.cuda.set_device(args.gpu) print('GPU ID:' + str(args.gpu)) gpu_id = args.gpu print("\nGPU using status: ", config.use_cuda) bert = BertExtractor(config) model = BiLSTMModel(vocab, config, bert.bert_hidden_size, bert.bert_layers) if config.use_cuda: torch.backends.cudnn.enabled = False model = model.cuda(args.gpu) bert = bert.cuda() bisent_classfier = BiSententClassifier(model, bert, vocab) data = read_corpus(config.train_file) dev_data = read_corpus(config.dev_file) test_data = read_corpus(config.test_file) train(data, dev_data, test_data, bisent_classfier, vocab, config)