Beispiel #1
0
    def train(self, conll_file, model_file, epochs = 10):
        model = ParserModel()
        instances = self.__get_instances(model, conll_file, model.register_feature)

        print 'start training ...'
        q = 0
        for epoch in xrange(epochs):
            correct = 0
            total = 0      
            # random.shuffle(instances)
            for (gold, head_vectors) in iter(instances):
                q += 1
                pred = model.predict(head_vectors)
                if gold != pred:
                    model.update(head_vectors[gold], head_vectors[pred], q)
                else:
                    correct += 1
                total += 1
            print '\nepoch %d done, %6.2f%% correct' % (epoch,100.0*correct/total)

        model.average(q)
        model.save(model_file)
Beispiel #2
0
    argparser.add_argument('--thread', default=1, type=int, help='thread num')
    argparser.add_argument('--gpu',
                           default=-1,
                           type=int,
                           help='Use id of gpu, -1 if cpu.')

    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)
    parser_config = ParserConfigurable(args.parser_config_file)

    dep_vocab = pickle.load(open(parser_config.load_vocab_path, 'rb'))
    dep_vec = dep_vocab.create_placeholder_embs(
        parser_config.pretrained_embeddings_file)

    parser_model = ParserModel(dep_vocab, parser_config, dep_vec)
    parser_model.load_state_dict(torch.load(parser_config.load_model_path, \
                                map_location=lambda storage, loc: storage))

    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec = vocab.load_pretrained_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))

    config.use_cuda = False
    gpu_id = -1
    if gpu and args.gpu >= 0:
        torch.cuda.set_device(args.gpu)
        config.use_cuda = True
        print("GPU ID: ", args.gpu)
        gpu_id = args.gpu
Beispiel #3
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Optional Task, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v)
                             for i, (k,
                                     v) in enumerate(dataset.word2idx.items())
                             if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object (used for Task 2)
    parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    # Uncomment the following parser for Task 3
    # parser = AnotherParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = F.cross_entropy

    # create object for an optimizer that updated the weights of our parser
    # model.  Be sure to set the learning rate based on the parameters!
    optimizer = optim.Adam(parser.parameters(), lr=config.lr)

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.  NOTE: In general, when using Pytorch,
            # you want to send them to the device that will do the computation
            # (either a GPU or CPU).  You do this by saying "obj.to(device)"
            # where we've already created the device for you (see above where we
            # did this for the parser).  This ensures your data is running on
            # the processor you expect it to!
            word_inputs_batch = torch.from_numpy(np.array(word_inputs_batch)).to(device)
            pos_inputs_batch = torch.from_numpy(np.array(pos_inputs_batch)).to(device)
            dep_inputs_batch = torch.from_numpy(np.array(dep_inputs_batch)).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.  HINT: look for the "argmax"
            # function in numpy.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.from_numpy(labels).to(device)

            # This is just a quick hack so you can cut training short to see how
            # things are working.  In the final model, make sure to use all the data!
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #
            #### Backprop & Update weights ####
            #

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.
            outputs = parser.forward(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels.  Note that for
            # your particular loss (cross-entropy) it will compute the softmax
            # for you, so you can safely pass in the raw activations.
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.  Note that we can use this held-out data to tune the
        # hyper-parameters of the model but we should never look at the test
        # data until we want to report the very final result.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    return parser
 def __init__(self, parser_model_file = None):
     if parser_model_file:
         self.model = ParserModel(parser_model_file)
     else:
         self.model = ParserModel()
class SentParser:
    def __init__(self, parser_model_file = None):
        if parser_model_file:
            self.model = ParserModel(parser_model_file)
        else:
            self.model = ParserModel()

    def __get_scores_for_MST(self, sent, model, map_func, feats, factor):
        scores = {}
        unigrams = make_unigram_features(sent)    
        for d in xrange(1, len(sent)):
            for h in xrange(len(sent)):
                if h != d:
                    vector = make_features_for_parser(sent, unigrams, h, d, map_func, feats)
                    s = model.score(vector)
                    if h != 0 and h == sent[d].unithead and s > 0:
                        s *= factor
                    scores[(h,d)] = (s, [(h,d)])
        return scores


    def train(self, instances, model_file, epochs = 10, factor = 1.0):
        print 'start training ...'
        self.model.make_weights()
        q = 0
        for epoch in xrange(epochs):
            correct = 0
            total = 0      
            for (gold, ch, head_vectors) in iter(instances):
                q += 1
                pred = self.model.predict(head_vectors)
                if gold != pred:
                    self.model.update(head_vectors[gold], head_vectors[pred], q)
                else:
                    correct += 1
                total += 1
            print '\nepoch %d done, %6.2f%% correct' % (epoch,100.0*correct/total)

        self.model.average(q)
        self.model.save(model_file)

    # def train(self, instances, model_file, epochs = 10):
    #     print 'start training ...'
    #     self.model.make_weights()
    #     q = 0
    #     for epoch in xrange(epochs):
    #         correct = 0
    #         total = 0      
    #         for (gold, head_vectors) in iter(instances):
    #             q += 1
    #             pred = self.model.predict(head_vectors)
    #             if gold != pred:
    #                 self.model.update(head_vectors[gold], head_vectors[pred], q)
    #             else:
    #                 correct += 1
    #             total += 1
    #         print '\nepoch %d done, %6.2f%% correct' % (epoch,100.0*correct/total)

    #     self.model.average(q)
    #     self.model.save(model_file)

    def predict(self, sent, feats, factor = 1.0):
        score = self.__get_scores_for_MST(sent, self.model, self.model.map_feature, feats, factor)
        graph = MST(score)
        sent.add_heads(graph.edges())
        return sent
class UnitParser:
    def __init__(self, parser_model_file = None):
        if parser_model_file:
            self.model = ParserModel(parser_model_file)
        else:
            self.model = ParserModel()

    def __get_scores_for_MST(self, sent, unit, model, map_func, feats):
        scores = {}
        unigrams = make_unigram_features(sent)    
        for d in unit:
            for h in unit + [0]:
                if h != d:
                    vector = make_features_for_parser(sent, unigrams, h, d, map_func, feats)
                    scores[(h,d)] = (model.score(vector), [(h,d)])
        return scores

    def train(self, instances, model_file, epochs = 10):
        self.model.make_weights()
        print 'start training ...'
        q = 0
        for epoch in xrange(epochs):
            correct = 0
            total = 0      
            for (gold, head_vectors) in iter(instances):
                q += 1
                pred = self.model.predict(head_vectors)
                if gold != pred:
                    self.model.update(head_vectors[gold], head_vectors[pred], q)
                else:
                    correct += 1
                total += 1
            print '\nepoch %d done, %6.2f%% correct' % (epoch,100.0*correct/total)

        self.model.average(q)
        self.model.save(model_file)


    # def train_CLE(self, instances, model_file, epoch = 10):
    #     self.model.make_weights()
    #     for epoch in xrange(epochs):
    #         correct = 0
    #         total = 0
    #         for sent in instances:

    def train_CLE(self, instances, model_file, epochs = 10):
        print 'start training ...'
        self.model.make_weights()
        q = 0
        for epoch in xrange(epochs):
            correct = 0
            total = 0      
            for (gold_arcs, vectors) in iter(instances):
                q += 1
                scores = {}
                for a in vectors:
                    scores[a] = (self.model.score(vectors[a]), [a])
                pred_arcs = MST(scores).edges()
                gold_heads = dict([(d, h) for (h, d) in gold_arcs])
                pred_heads = dict([(d, h) for (h, d) in pred_arcs])
                for d in gold_heads:
                    if gold_heads[d] != pred_heads[d]:
                        self.model.update(vectors[(gold_heads[d], d)], vectors[(pred_heads[d], d)], q)
                    else:
                        correct += 1
                    total += 1
            print '\nepoch %d done, %6.2f%% correct' % (epoch,100.0*correct/total)
        self.model.average(q)
        self.model.save(model_file)


    def predict(self, sent, unit, feats):
        score = self.__get_scores_for_MST(sent, unit, self.model, self.model.map_feature, feats)
        graph = MST(score)
        sent.add_unitheads(graph.edges())
        # use to check unit accuracy
        # sent.add_heads(graph.edges())
        return sent
Beispiel #7
0
    else:
        parser, embeddings, train_data, train_set, dev_data, test_data, \
        pad_action = load_and_preprocess_data(opt,debug)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_batched, max_seq_length, mean_seq_length = batch_train(
        train_data, opt.batchsize, pad_action, parser)
    dev_batched = batch_dev_test(dev_data, opt.batchsize, parser.NULL,
                                 parser.P_NULL, parser)
    test_batched = batch_dev_test(test_data, opt.batchsize, parser.NULL,
                                  parser.P_NULL, parser)

    start = time.time()

    model = ParserModel(embeddings.shape[0], device, parser, pad_action, opt,
                        embeddings.shape[1])
    print("number of pars:{}".format(
        sum(p.numel() for p in model.parameters() if p.requires_grad)))
    if opt.pretrained:
        state_dict = torch.load(opt.mainpath + '/output/' +
                                str(opt.modelpath) + "model.weights" +
                                "pretrained")
        model.load_state_dict(state_dict['model'])
        del state_dict

    if opt.multigpu:
        print('multi')
        print(torch.cuda.device_count())
        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
Beispiel #8
0
    with open(opt2.mainpath+'/vocab/'+str(opt2.model_name)+'.pkl', 'rb') as f:
        parser = pickle.load(f)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    print(80 * "=")
    print("INITIALIZING")
    print(80 * "=")
    start = time.time()
    debug = False
    test_data,pad_action = load_and_preprocess_data_test(opt,parser,debug)
    
    test_batched = batch_dev_test(test_data, opt.batchsize, parser.NULL, parser.P_NULL,
                                  parser ,no_sort=False)

    model = ParserModel(parser.embedding_shape, device, parser, pad_action, opt)
    
    model.load_state_dict(checkpoint['model'],strict=False)
    
    model = model.to(device)
    print("took {:.2f} seconds\n".format(time.time() - start))
    
    print(80 * "=")
    print("TESTING")
    print(80 * "=")
    print("Final evaluation on test set", )
    model.eval()

    UAS, LAS = validate(model, parser, test_batched, test_data, device, opt.batchsize,pad_action['P'],opt)
    print("- test UAS: {:.2f}".format(UAS * 100.0))
    print("- test LAS: {:.2f}".format(LAS * 100.0))
Beispiel #9
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ',
              [(k, v)
               for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    parser = ParserModel(config, word_embeddings, pos_embeddings,
                         dep_embeddings)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of parser model.
    optimizer = torch.optim.SGD(parser.parameters(), lr=config.lr)

    loss_list = []
    acc_list = []
    uas_list = []
    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.tensor(labels)

            if max_iters >= 0 and i > max_iters:
                break
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables

            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.

            outputs = parser(word_inputs_batch, pos_inputs_batch,
                             dep_inputs_batch)  # TODO

            # Compute the loss for the outputs with the labels.

            loss = None
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters

            loss.backward()
            # Perform 1 update using the optimizer

            optimizer.step()
            # Every 10 batches, print out some reporting so I can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir,
                                 '%s-epoch-%d.mdl' % (parser_name, epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1) == labels).sum()) / len(labels))
        uas_list.append(valid_UAS * 100.0)

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    score = pd.DataFrame({'loss': loss_list, 'acc': acc_list, 'uas': uas_list})
    score.to_csv(r"score.csv", index=True, header=True)

    return parser
Beispiel #10
0
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--config_file', default='default.cfg')
    argparser.add_argument('--parser_config_file', default='parser.cfg')
    argparser.add_argument('--thread', default=1, type=int, help='thread num')
    argparser.add_argument('--gpu',
                           default=-1,
                           type=int,
                           help='Use id of gpu, -1 if cpu.')

    args, extra_args = argparser.parse_known_args()
    config = Configurable(args.config_file, extra_args)
    parser_config = ParserConfigurable(args.parser_config_file)
    torch.set_num_threads(args.thread)

    dep_vocab = pickle.load(open(parser_config.load_vocab_path, 'rb'))
    parser_model = ParserModel(dep_vocab, parser_config)
    dump_model = torch.load(parser_config.load_model_path,
                            map_location=lambda storage, loc: storage)
    dep_vec = dump_model["extword_embed.weight"].detach().cpu().numpy()
    del dump_model["extword_embed.weight"]
    parser_model.load_state_dict(dump_model)
    parser_extembed = ExtWord(dep_vocab, parser_config, dep_vec)
    torch.save(parser_model.state_dict(), config.save_model_path + ".synbasic")
    torch.save(parser_extembed.state_dict(),
               config.save_model_path + ".synvec")
    pickle.dump(dep_vocab, open(config.save_vocab_path + ".syn", 'wb'))

    vocab = creatVocab(config.train_file, config.min_occur_count)
    vec = vocab.load_initialize_embs(config.pretrained_embeddings_file)
    pickle.dump(vocab, open(config.save_vocab_path, 'wb'))
Beispiel #11
0
if __name__ == "__main__":
    # Note: Set debug to False, when training on entire corpus
    # debug = True
    debug = False

    assert (torch.__version__ == "1.0.0"), "Please install torch version 1.0.0"

    print(80 * "=")
    print("INITIALIZING")
    print(80 * "=")
    parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(
        debug)

    start = time.time()
    model = ParserModel(embeddings)
    parser.model = model
    print("took {:.2f} seconds\n".format(time.time() - start))

    print(80 * "=")
    print("TRAINING")
    print(80 * "=")
    output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
    output_path = output_dir + "model.weights"

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    train(parser,
          train_data,
          dev_data,
Beispiel #12
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10,
          layer_num=1):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Task 3, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v) for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    if layer_num <= 1:
        parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)
    else:
        parser = MultiLayer_ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings, layer_num)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of our parser model
    optimizer = torch.optim.Adam(parser.parameters(), lr=config.lr)

    # initialize lists to plot data
    loss_list, acc_list, uas_list = [], [], []

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets], config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was 1, then to pytorch tensor
            labels = torch.tensor(np.argmax(train_y, axis=1)).to(device)

            # This is just a quick hack so you can cut training short to see how things are working
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the data and get the outputs for each item's prediction
            outputs = parser(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in "evaluation" mode
        parser.eval()

        # Compute the current model's UAS score on the validation (development) dataset
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Append the computed values to plotting lists
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1)==labels).sum())/len(labels))
        uas_list.append(valid_UAS*100.0)

        # Once we're done with test/validation, we need to indicate that we are back in "train" mode
        parser.train()

    # Plot the data!
    epoch_size = np.arange(1, num_epochs + 1)

    loss_plot = {"Epoch":epoch_size, "Loss":np.array(loss_list)}
    seaborn.lineplot(x="Epoch", y="Loss", data=loss_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Loss")
    plot.title("Training Loss vs Time")
    plot.show()

    acc_plot = {"Epoch":epoch_size, "Accuracy":np.array(acc_list)}
    seaborn.lineplot(x="Epoch", y="Accuracy", data=acc_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Accuracy")
    plot.title("Training Accuracy vs Time")
    plot.show()

    uas_plot = {"Epoch":epoch_size, "UAS":np.array(uas_list)}
    seaborn.lineplot(x="Epoch", y="UAS", data=uas_plot)
    plot.xlabel("Epoch")
    plot.ylabel("UAS")
    plot.title("Unlabeled Attachment Score vs Time")
    plot.show()

    return parser
Beispiel #13
0
        # This includes the representation for "padding" and "OOV"
        embedding_matrix[i] = embedding_vector

from model import ParserModel

n_features = len(train_x[0][0])
n_pos = len(parser.pos2id)
n_tags = len(parser.dep2id)
tag_size = 20  # size of embeddings for POS and DEPRELs
n_actions = 2 * n_tags + 1  # L- + R- + S
hidden_size = 200

model = ParserModel(embeddings=embedding_matrix,
                    n_features=n_features,
                    n_pos=n_pos,
                    n_tags=n_tags,
                    tag_size=tag_size,
                    n_actions=n_actions,
                    hidden_size=hidden_size)

# Compile the model
from tensorflow import keras

model.compile(
    # Optimizer
    optimizer=keras.optimizers.Adam(),
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(name='train_loss'),
    # List of metrics to monitor
    metrics=[keras.metrics.SparseCategoricalAccuracy(name='train acc')],
)