Example #1
0
    def __init__(self, settings, vocabs):
        self.vocabs = vocabs
        if not settings.disable_external:
            self.external = External(settings.external)
        else:
            self.external = External(None)
        self.model = None
        self.optimizer = None
        self.train_data = None
        self.test_data = None
        self.epoch_offset = 0
        self.settings = settings

        if settings.tree:
            #self.dec = dd.DependencyDecoder()
            self.dec = True
        else:
            self.dec = None

        # which targets to take
        self.ot = settings.ot
        self.pt = settings.pt

        self.device = settings.device

        self.loss_interpolation = settings.loss_interpolation
        self.model_interpolation = settings.model_interpolation
        self.batch_size = settings.batch_size

        self.model = BiLSTMModel(self.vocabs, self.external, settings)
        self.model = self.model.to(self.settings.device)

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          betas=(settings.beta1,
                                                 settings.beta2),
                                          weight_decay=settings.l2)
        self._store_settings()
Example #2
0
    parser_model = ParserModel(dep_vocab, parser_config, dep_vec)
    parser_model.load_state_dict(torch.load(parser_config.load_model_path, \
                                map_location=lambda storage, loc: storage))

    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec = vocab.load_pretrained_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu >= 0:
        torch.cuda.set_device(args.gpu)
        config.use_cuda = True
        print("GPU ID: ", args.gpu)
        gpu_id = args.gpu

    model = BiLSTMModel(vocab, config, parser_config, vec)
    if config.use_cuda:
        #torch.backends.cudnn.enabled = True
        model = model.cuda()
        parser_model = parser_model.cuda()

    classifier = SentenceClassifier(config, model, vocab, parser_model,
                                    dep_vocab)

    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)

    train(data, dev_data, test_data, classifier, vocab, dep_vocab, config)
Example #3
0
    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)

    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec = vocab.load_pretrained_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    if gpu and args.gpu >= 0:
        config.use_cuda = True
        torch.cuda.set_device(args.gpu)
        print("GPU ID: ", args.gpu)
    print("\nGPU using status: ", config.use_cuda)

    # print(config.use_cuda)

    model = BiLSTMModel(vocab, config, vec)
    model = model.cpu()
    if config.use_cuda:
        #torch.backends.cudnn.enabled = True
        model = model.cuda()

    classifier = SentenceClassifier(model, vocab)

    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)

    train(data, dev_data, test_data, classifier, vocab, config)
Example #4
0
    vocab = creat_vocab(config.train_file, config.bert_vocab_file,
                        config.min_occur_count)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu >= 0:
        torch.cuda.set_device(args.gpu)
        config.use_cuda = True
        print("GPU ID: ", args.gpu)
        gpu_id = args.gpu

    bert = BertExtractor(config)

    model = BiLSTMModel(vocab, config, parser_config, bert.bert_hidden_size,
                        bert.bert_layers)
    if config.use_cuda:
        # torch.backends.cudnn.enabled = True
        model = model.cuda()
        bert = bert.cuda()
        parser_model = parser_model.cuda()

    labeler = SequenceLabeler(model, bert, parser_model)

    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)

    train(data, dev_data, test_data, labeler, vocab, dep_vocab, config)
Example #5
0
    def train(self):
        checkpoint_path = os.path.join(self.config.model_dir, 'model.ckpt')
        data_dirs = self.config.data_paths
        data_helper = DataHelper(self.train_tokens, self.validation_tokens,
                                 self.train_tags)
        data_helper.dump_data_to_file(self.config.model_dir)

        tf.reset_default_graph()
        print('=' * 50)
        print('=' * 50)
        print(' [*] Checkpoint path: %s' % checkpoint_path)
        print(' [*] Loading training data from: %s' % data_dirs)
        print(' [*] Using model: %s' % self.config.model_dir)
        print(hparams_debug_string())

        token_count = data_helper.gettokencount()
        tag_count = data_helper.gettagcount()
        cap_feat_count = 4
        pad_index = data_helper.word2idx('<PAD>')

        print('Tag count : %d' % tag_count)
        print('Token count : %d' % token_count)
        print('Cap Features count : %d' % cap_feat_count)
        print('PAD index : %d' % pad_index)

        with tf.Graph().as_default() as graph:
            sess_config = tf.ConfigProto(log_device_placement=True,
                                         allow_soft_placement=True)
            sess_config.gpu_options.allow_growth = True

            with tf.Session(config=sess_config) as sess:
                try:

                    global_step = tf.Variable(0,
                                              name='global_step',
                                              trainable=False)
                    saver = tf.train.Saver(max_to_keep=None,
                                           keep_checkpoint_every_n_hours=2)
                    sess_config = tf.ConfigProto(log_device_placement=False,
                                                 allow_soft_placement=True)
                    sess_config.gpu_options.allow_growth = True

                    with tf.variable_scope('model'):
                        model = BiLSTMModel()
                        model.init()
                        train_vars = model.build_layers(
                            hparams=hparams,
                            vocabulary_size=token_count,
                            n_cap_feats=cap_feat_count,
                            n_tags=tag_count)
                        model.compute_predictions()
                        model.compute_loss(n_tags=tag_count,
                                           PAD_index=pad_index)
                        model.perform_optimization(global_step)
                        train_stat = model.add_stats()

                        print('No. of trainable variables : %d' % train_vars)

                        sess.run(tf.global_variables_initializer())
                        summary_writer = tf.summary.FileWriter(
                            self.config.model_dir)
                        saver = tf.train.Saver(tf.global_variables(),
                                               max_to_keep=5)

                        start_step = sess.run(global_step)
                        print('-' * 20 + ' Starting new training ' + '-' * 20)
                        learning_rate = self.config.learning_rate

                        for epoch in range(self.config.n_epochs):
                            print('-' * 20 + ' Epoch {}'.format(epoch + 1) +
                                  '-' * 20)

                            for x_batch, c_batch, y_batch, lengths in data_helper.batches_generator(
                                    self.config.batch_size, self.train_tokens,
                                    self.train_tags):
                                step, loss = model.train_on_batch(
                                    sess, global_step, x_batch, c_batch,
                                    y_batch, lengths, learning_rate,
                                    self.config.dropout_keep_probability)
                                if step % 100 == 0:
                                    print('[Step : %d] loss : %f' %
                                          (step, loss))

                            learning_rate = learning_rate / self.config.learning_rate_decay

                            if (epoch +
                                    1) % self.config.checkpoint_interval == 0:
                                print('Saving checkpoint to : %s-%d' %
                                      (checkpoint_path, epoch + 1))
                                saver.save(sess,
                                           checkpoint_path,
                                           global_step=epoch)

                            if (epoch + 1) % self.config.test_interval == 0:
                                print('Train data evaluation')
                                data_helper.eval_conll(model,
                                                       sess,
                                                       self.train_tokens,
                                                       self.train_tags,
                                                       short_report=True)

                                print('Validation data evaluation:')
                                data_helper.eval_conll(model,
                                                       sess,
                                                       self.validation_tokens,
                                                       self.validation_tags,
                                                       short_report=True)

                except Exception as e:
                    print('Exitin due to exception : [%s]!!!' % e)
                    traceback.print_exc()
                    log.close()
                    return

                print('-' * 20 + ' Training completed! ' + '-' * 20)
                print('Training data evaluation:')
                data_helper.eval_conll(model,
                                       sess,
                                       self.train_tokens,
                                       self.train_tags,
                                       short_report=False)

                print('Validation data evaluation:')
                data_helper.eval_conll(model,
                                       sess,
                                       self.validation_tokens,
                                       self.validation_tags,
                                       short_report=False)

                print('Training data evaluation:')
                data_helper.eval_conll(model,
                                       sess,
                                       self.test_tokens,
                                       self.test_tags,
                                       short_report=True)
        return
Example #6
0
    vocab = creatVocab(config.train_file, config.min_occur_count)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu >= 0:
        torch.cuda.set_device(args.gpu)
        config.use_cuda = True
        print("GPU ID: ", args.gpu)
        gpu_id = args.gpu

    elmo = ElmoEmbedder(config.elmo_option_file, config.elmo_weight_file,
                        gpu_id)

    elmo_layers = elmo.elmo_bilm.num_layers
    elmo_dims = elmo.elmo_bilm.get_output_dim()

    model = BiLSTMModel(vocab, config, (elmo_layers, elmo_dims))

    if config.use_cuda:
        # torch.backends.cudnn.enabled = True
        model = model.cuda()

    classifier = SentenceClassifier(model, elmo, vocab)

    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)

    train(data, dev_data, test_data, classifier, vocab, config)
Example #7
0
    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec = vocab.load_initialize_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu != -1:
        config.use_cuda = True
        torch.cuda.set_device(args.gpu)
        print('GPU ID:' + str(args.gpu))
        gpu_id = args.gpu
    print("\nGPU using status: ", config.use_cuda)

    elmo = ElmoEmbedder(config.elmo_option_file, config.elmo_weight_file,
                        gpu_id)
    elmo_layers = elmo.elmo_bilm.num_layers
    elmo_dims = elmo.elmo_bilm.get_output_dim()

    model = BiLSTMModel(vocab, config, vec, (elmo_layers, elmo_dims))

    if config.use_cuda:
        torch.backends.cudnn.enabled = False
        model = model.cuda(args.gpu)

    classifier = BiSententClassifier(model, elmo, vocab)
    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)
    train(data, dev_data, test_data, classifier, vocab, config)
Example #8
0
    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)

    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec1 = vocab.load_initialize_embs(config.pretrained_embeddings_file)
    vec2 = vocab.load_pretrained_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu != -1:
        config.use_cuda = True
        torch.cuda.set_device(args.gpu)
        print('GPU ID:' + str(args.gpu))
        gpu_id = args.gpu
    print("\nGPU using status: ", config.use_cuda)

    model = BiLSTMModel(vocab, config, vec1)
    extword_embed = ExtWord(vocab, config, vec2)
    if config.use_cuda:
        torch.backends.cudnn.enabled = False
        model = model.cuda(args.gpu)
        extword_embed = extword_embed.cuda(args.gpu)

    classifier = BiSententClassifier(model, extword_embed, vocab)
    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)
    train(data, dev_data, test_data, classifier, vocab, config)
Example #9
0
def main():

    # Read datasets
    data = Dataset(args.DATA_DIR)
    sents, tags = data.get_all_data()

    # Construct the model
    MyModel = BiLSTMModel(args.MAX_SEQ_LEN, args.EMBEDDING,
                          args.LSTM_HIDDEN_UNITS, args.LSTM_DENSE_DIM,
                          data.get_nwords(), data.get_ntags())
    model = MyModel.define_model()

    num_train_sents = len(data.train_sents)
    num_val_sents = len(data.val_sents)
    num_test_sents = len(data.test_sents)

    print(
        "# train sents = {0} \n # of val sents = {1} \n # of test sents = {2}".
        format(num_train_sents, num_val_sents, num_test_sents),
        flush=True)

    # indexes to train, val and test data
    partition = {
        "train": list(range(num_train_sents)),
        "val": list(range(num_val_sents)),
        "test": list(range(num_test_sents))
    }

    # Parameters
    params = {
        'dim': args.MAX_SEQ_LEN,
        'batch_size': args.BATCH_SIZE,
        'n_classes': data.get_ntags(),
        'shuffle': True,
        'word2idx': data.get_word2idx(),
        'tag2idx': data.get_tag2idx()
    }

    # Generators
    training_generator = DG.DataGenerator(partition['train'], data.train_sents,
                                          data.train_tags, **params)
    validation_generator = DG.DataGenerator(partition['val'], data.val_sents,
                                            data.val_tags, **params)

    # Train model on dataset
    history = model.fit_generator(generator=training_generator,
                                  validation_data=validation_generator,
                                  use_multiprocessing=True,
                                  epochs=args.NUM_EPOCHS,
                                  verbose=1)

    # Parameters
    params_test = {
        'dim': args.MAX_SEQ_LEN,
        'batch_size': 1,
        'n_classes': data.get_ntags(),
        'shuffle': False,
        'word2idx': data.get_word2idx(),
        'tag2idx': data.get_tag2idx()
    }

    # Make predictions
    testing_generator = DG.DataGenerator(partition['test'], data.test_sents,
                                         data.train_tags, **params_test)

    pred_test = model.predict_generator(generator=testing_generator,
                                        steps=num_test_sents)
    pred_test = np.argmax(pred_test, axis=-1)

    # print(pred_test.shape)

    def pad(x):
        x1 = [
            tgs + ([data.get_tag2idx()["PAD"]] * (args.MAX_SEQ_LEN - len(tgs)))
            for tgs in x
        ]
        x2 = [tgs[:args.MAX_SEQ_LEN] for tgs in x1]
        return np.array(x2)

    test_tags_padded = pad(data.test_tags)

    # print(test_tags_padded.shape)

    def get_measures(yTrue, yPred):
        y1 = yTrue.reshape(1, -1).squeeze()
        y2 = yPred.reshape(1, -1).squeeze()

        P = precision_score(y1, y2, average=None)
        R = recall_score(y1, y2, average=None)
        F1 = f1_score(y1, y2, average=None)

        print("Precision=", flush=True)
        print(P, flush=True)
        print("Recall=", flush=True)
        print(R, flush=True)
        print("F1 score=", flush=True)
        print(F1, flush=True)

    print("Test...", flush=True)
    get_measures(test_tags_padded, pred_test)
Example #10
0
class ModelInteractor:
    """Responsible for training the model and using it to make predictions"""
    @staticmethod
    def factory(settings, vocabs):
        if settings.unfactorized:
            return ModelInteractorUnfactorized(settings, vocabs)
        else:
            return ModelInteractorfactorized(settings, vocabs)

    def __init__(self, settings, vocabs):
        self.vocabs = vocabs
        if not settings.disable_external:
            self.external = External(settings.external)
        else:
            self.external = External(None)
        self.model = None
        self.optimizer = None
        self.train_data = None
        self.test_data = None
        self.epoch_offset = 0
        self.settings = settings

        if settings.tree:
            #self.dec = dd.DependencyDecoder()
            self.dec = True
        else:
            self.dec = None

        # which targets to take
        self.ot = settings.ot
        self.pt = settings.pt

        self.device = settings.device

        self.loss_interpolation = settings.loss_interpolation
        self.model_interpolation = settings.model_interpolation
        self.batch_size = settings.batch_size

        self.model = BiLSTMModel(self.vocabs, self.external, settings)
        self.model = self.model.to(self.settings.device)

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          betas=(settings.beta1,
                                                 settings.beta2),
                                          weight_decay=settings.l2)
        self._store_settings()

    def _store_settings(self):
        with open(self.settings.dir + "settings.json", "w") as fh:
            json.dump(
                {
                    k: v
                    for k, v in self.settings.__dict__.items()
                    if k not in "device".split()
                }, fh)
            #for key, val in self.settings.__dict__.items():
            #    print("{}: {}".format(key,val), file=fw)

    def upd_from_other(self, other, *args):
        other_dict = other.model.state_dict()
        print(other_dict.keys())
        model_dict = self.model.state_dict()
        od = {}
        for k, v in other_dict.items():
            for a in args:
                if k.startswith(a):
                    od[k] = v
        #other_dict = {k: v for k, v in other_dict.items() if k in args}
        # 2. overwrite entries in the existing state dict
        print(od.keys())
        model_dict.update(od)
        # 3. load the new state dict
        self.model.load_state_dict(model_dict)

    def freeze_params(self, *freeze):
        froze = []
        for name, param in self.model.named_parameters():
            for f in freeze:
                if name.startswith(f):
                    froze.append(name)
                    param.requires_grad = False
        print(f"froze {froze} parameters")

    def _init_training_data(self, train_path):
        self.train_data = MyDataset(train_path,
                                    vocabs=self.vocabs,
                                    external=self.external,
                                    settings=self.settings,
                                    elmo=self.settings.elmo_train,
                                    vec_dim=self.settings.vec_dim)
        return DataLoader(self.train_data,
                          batch_size=self.batch_size,
                          shuffle=True,
                          collate_fn=padded_collate)

    def _init_test_data(self, test_path, elmo_path=None):
        self.test_data = MyDataset(test_path,
                                   vocabs=self.vocabs,
                                   external=self.external,
                                   settings=self.settings,
                                   elmo=elmo_path,
                                   vec_dim=self.settings.vec_dim)
        return DataLoader(self.test_data,
                          batch_size=self.batch_size,
                          shuffle=False,
                          collate_fn=padded_collate)

    def _run_train_batch(self, batch, optimizer, gradient_clipping=True):
        raise NotImplementedError()

    def _run_train_epoch(self,
                         data,
                         epoch,
                         verbose=True,
                         gradient_clipping=True):
        self.model.train()
        print_every = int(len(data) / 100) + 1

        total_loss = 0
        sequences_trained = 0

        debug_loss = []
        debug_timer = time.time()
        for i, batch in enumerate(data):
            batch.to(self.device)
            loss = self._run_train_batch(batch, self.optimizer,
                                         gradient_clipping)
            debug_loss.append(loss)
            if torch.cuda.is_available():
                print(torch.cuda.memory_allocated(self.device) / 10**6)
                print(torch.cuda.memory_cached(self.device) / 10**6)
                torch.cuda.empty_cache()
                print(torch.cuda.memory_cached(self.device) / 10**6)

            if verbose and (i + 1) % print_every == 0:
                percentage = int((i + 1) / print_every)
                print("{}% of epoch {} ".format(percentage, epoch) +
                      "completed, current loss is {}".format(
                          round(sum(debug_loss) / len(debug_loss), 6)) +
                      " averaged over the past {} sentences".format(
                          len(debug_loss) * batch.sentence_count) +
                      " (took {} seconds)".format(
                          round(time.time() - debug_timer, 2)),
                      flush=True)
                debug_loss = []
                debug_timer = time.time()
            total_loss += loss
            sequences_trained += batch.sentence_count
        return total_loss, sequences_trained

    def train(self):
        settings = self.settings

        print("Training is starting for {} epochs using ".format(
            settings.epochs) +
              "{} with the following settings:".format(self.device))
        print()
        for key, val in settings.__dict__.items():
            print("{}: {}".format(key, val))
        print(flush=True)

        train_dataloader = self._init_training_data(settings.train)
        best_f1 = 0
        best_f1_epoch = 1 + self.epoch_offset

        for epoch in range(1 + self.epoch_offset,
                           settings.epochs + 1 + self.epoch_offset):
            start_time = time.time()
            total_loss, sequences_trained = self._run_train_epoch(
                train_dataloader, epoch, not settings.quiet,
                not settings.disable_gradient_clip)
            total_time = round(time.time() - start_time, 2)
            print("#" * 50)
            print("Epoch {}".format(epoch))
            print("loss {}".format(total_loss))
            print("execution time {}s".format(total_time) \
            + " ({} trained sequences/s)".format(round(sequences_trained/(total_time))))
            print("#" * 50, flush=True)
            if not settings.disable_val_eval:
                entries, predicted, other_predicted = self.predict(
                    settings.val, settings.elmo_dev)
                #a,d,b,c = zip(*((entry[0], len(entry[4]), entry[1].numpy().shape, predicted[entry[0]].numpy().shape) for entry in entries))
                #print([(x,w,y,z) for x,w,y,z in zip(a,d,b,c) if y!=z])
                f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(),
                                         predicted[entry[0]].numpy())
                                        for entry in entries)))
                print("Primary Dev F1 on epoch {} is {:.2%}".format(epoch, f1))

                if len(other_predicted) > 0:
                    other_f1, _ = sc.score(*zip(
                        *((entry[1][self.ot].numpy(),
                           other_predicted[entry[0]].numpy())
                          for entry in entries)))
                    print("Secondary Dev F1 on epoch {} is {:.2%}".format(
                        epoch, other_f1))
                #f1 = sc.score()
                improvement = f1 > best_f1
                elapsed = epoch - best_f1_epoch
                es_active = settings.early_stopping > 0

                if (es_active and not improvement
                        and elapsed == settings.early_stopping):
                    print("Have not seen any improvement for {} epochs".format(
                        elapsed))
                    print("Best F1 was {} seen at epoch #{}".format(
                        best_f1, best_f1_epoch))
                    break
                else:
                    if improvement:
                        best_f1 = f1
                        best_f1_epoch = epoch
                        print("Saving {} model".format(best_f1_epoch))
                        self.save("best_model.save", epoch)
                    else:
                        print("Have not seen any improvement for {} epochs".
                              format(elapsed))
                    print("Best F1 was {:.2%} seen at epoch #{}".format(
                        best_f1, best_f1_epoch))

            if settings.enable_train_eval:
                entries, predicted, other_predicted = self.predict(
                    settings.train, settings.elmo_train)
                train_f1, _ = sc.score(*zip(*((entry[1][self.pt].numpy(),
                                               predicted[entry[0]].numpy())
                                              for entry in entries)))
                print("Sem Train F1 on epoch {} is {:.2%}".format(
                    epoch, train_f1))

                if len(other_predicted) > 0:
                    other_train_f1, _ = sc.score(*zip(
                        *((entry[1][self.ot].numpy(),
                           other_predicted[entry[0]].numpy())
                          for entry in entries)))
                    print("Syn Train F1 on epoch {} is {:.2%}".format(
                        epoch, other_train_f1))

            if settings.save_every:
                self.save("{}_epoch{}.save".format(int(time.time()), epoch),
                          epoch)
            else:
                self.save("last_epoch.save", epoch)

    def _run_test_batch(self, batch):
        raise NotImplementedError()

    def _clip_grad(self, gradient_clipping):
        if gradient_clipping:
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)

    def predict(self, data_path, elmo_path=None):
        print("Predicting data from", data_path)
        test_loader = self._init_test_data(data_path, elmo_path)
        self.model.eval()
        predictions = {}
        other_predictions = {}
        for batch in test_loader:
            batch.to(self.device)
            print(".", end="")
            sys.stdout.flush()
            with torch.no_grad():
                pred, other_pred = self._run_test_batch(batch)
                predictions.update(pred)
                other_predictions.update(other_pred)
        #for k,v in predictions.items():
        #    print(k, v.shape)
        print("Done")

        #return self.test_data.data, predictions
        return self.test_data, predictions, other_predictions

    def save(self, path, epoch):
        cuda_state = torch.cuda.get_rng_state() if torch.cuda.is_available(
        ) else None
        state = {
            "model": self.model.state_dict(),
            "optimizer": self.optimizer.state_dict(),
            "vocabs": self.vocabs,
            "rng_state": torch.get_rng_state(),
            "cuda_rng_state": cuda_state,
            "epoch": epoch
        }
        torch.save(state, self.settings.dir + path)

    def load(self, path):
        print("Restoring model from {}".format(path))
        state = torch.load(path)
        self.model.load_state_dict(state["model"])
        self.model = self.model.to(self.settings.device)
        self.optimizer.load_state_dict(state["optimizer"])
        self.vocabs = state["vocabs"]
        torch.set_rng_state(state["rng_state"])
        if torch.cuda.is_available():
            torch.cuda.set_rng_state(state["cuda_rng_state"])
        self.epoch_offset = state["epoch"]

    def other_loss(self, other_edge_scores, other_label_scores, batch, loss):
        #####
        if torch.cuda.is_available():
            print("other_loss")
            print(torch.cuda.memory_allocated(self.device) / 10**6)
            print(torch.cuda.memory_cached(self.device) / 10**6)
            torch.cuda.empty_cache()
            print(torch.cuda.memory_cached(self.device) / 10**6)

        other_label_scores_transposed = other_label_scores.transpose(0, 1)
        other_edge_targets = (batch.targetss[self.ot] > 0)
        other_unpadded_edge_scores = other_edge_scores[batch.unpadding_mask]
        other_unpadded_edge_targets = other_edge_targets[batch.unpadding_mask]
        other_edge_loss = F.binary_cross_entropy_with_logits(
            other_unpadded_edge_scores, other_unpadded_edge_targets.float())
        other_gold_mask = other_edge_targets
        other_gold_mask_expanded = other_gold_mask.unsqueeze(0).expand_as(
            other_label_scores_transposed)
        other_gold_targets = batch.targetss[self.ot][other_gold_mask]
        if len(other_gold_targets) > 0:
            # Extract the scores for the existing labels
            other_scores = other_label_scores_transposed[
                other_gold_mask_expanded]
            # (labels x predictions)
            other_scores = other_scores.view(-1, len(other_gold_targets))

            # scores.t() => [#predictions x #labels], gold_target [#predictions]
            # gold_target needs to contain the indices of the correct labels.
            # Since gold_target labels are in the range 1..#labels, 1 is subtracted
            other_label_loss = F.cross_entropy(other_scores.t(),
                                               other_gold_targets - 1)

            other_loss = self.loss_interpolation * other_label_loss + (
                1 - self.loss_interpolation) * other_edge_loss
        else:
            other_loss = (1 - self.loss_interpolation) * other_edge_loss

        loss *= 1 - self.model_interpolation
        loss += other_loss * self.model_interpolation

        return loss
        #####
    def other_predict(self, other_edge_scores, other_label_scores, i, size,
                      other_predictions, batch):
        ####
        other_unpadded_edge_scores = other_edge_scores[i, :size, :size]
        other_unpadded_label_scores = other_label_scores[i, :, :size, :size]
        other_edge_prediction = self.predict_edges(other_unpadded_edge_scores)
        other_label_prediction = self.predict_labels(
            other_unpadded_label_scores)  #.cpu().numpy()
        other_combined_prediction = (other_edge_prediction *
                                     other_label_prediction)
        other_predictions[batch.graph_ids[i]] = other_combined_prediction.cpu()
Example #11
0
                           default=-1,
                           type=int,
                           help='Use id of gpu, -1 if cpu.')
    argparser.add_argument('--input', default='dev/dev.txt')
    argparser.add_argument('--output', default='dev/dev.txt.out')

    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)

    vocab = pickle.load(open(config.load_vocab_path, 'rb+'))
    vec = vocab.create_placeholder_embs(config.pretrained_embeddings_file)

    config.use_cuda = False
    # if gpu and args.use_cuda:
    if gpu and args.gpu >= 0:
        config.use_cuda = True
        torch.cuda.set_device(args.gpu)
        print('GPU ID:' + str(args.gpu))

    model = BiLSTMModel(vocab, config, vec)
    model.load_state_dict(
        torch.load(config.load_model_path,
                   map_location=lambda storage, loc: storage))
    if config.use_cuda:
        # torch.backends.cudnn.enabled = True
        model = model.cuda(device=args.gpu)

    classifier = CloneDetection(model, vocab, config.use_cosine)
    evaluate(args.input, classifier, vocab, args.output)
Example #12
0
    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)

    vocab = creatVocab(config.train_file, config.bert_vocab_file,
                       config.min_occur_count)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu != -1:
        config.use_cuda = True
        torch.cuda.set_device(args.gpu)
        print('GPU ID:' + str(args.gpu))
        gpu_id = args.gpu
    print("\nGPU using status: ", config.use_cuda)

    bert = BertExtractor(config)
    model = BiLSTMModel(vocab, config, bert.bert_hidden_size, bert.bert_layers)

    if config.use_cuda:
        torch.backends.cudnn.enabled = False
        model = model.cuda(args.gpu)
        bert = bert.cuda()

    bisent_classfier = BiSententClassifier(model, bert, vocab)
    data = read_corpus(config.train_file)
    dev_data = read_corpus(config.dev_file)
    test_data = read_corpus(config.test_file)
    train(data, dev_data, test_data, bisent_classfier, vocab, config)