def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", help="Input .npy training data", required=True)
    parser.add_argument("-s", "--seqlen", help="Sequence length")
    args = parser.parse_args()

    if not args.seqlen:
        sequence_length = DEFAULT_SEQ_LEN
    else:
        sequence_length = int(args.seqlen)

    df = pd.read_csv("data/songdata.zip")
    path = Path("chars.pkl")
    chars = list()
    if path.is_file():
        chars = util.load_vocab(path)
        print("Loaded from file")
    else:
        vocab = set()
        for song in df["text"]:
            chars = set(song)
            vocab = vocab.union(chars)
        chars = list(vocab)
        util.write_vocab(path, chars)
        print("Generated from source")
        
    vocab_size = len(chars)
    print("Vocab size:", vocab_size)
    
    data = np.load(args.input)
    X = data[:, :-1]
    Y = data[:, -1]

    kfold = KFold(n_splits=4)
    scores = np.zeros((4,))
    for (i, (train_index, test_index)) in enumerate(kfold.split(X)):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]

        checkpoint = ModelCheckpoint("weights/weights_char_k{}_{}.h5".format(i, "{epoch:01d}"),
            monitor='loss',
            verbose=1,
            mode='auto',
            period=1,
            save_weights_only=True)

        model = build_model(sequence_length, vocab_size)
        model.fit_generator(generate_batches(X_train, Y_train, BATCH_SIZE, vocab_size), samples_per_epoch=300, epochs=10, callbacks=[checkpoint])
        perp = perplexity_score(model, X_test, Y_test, vocab_size)
        print("Local perplexity:", perp)
        scores[i] = perp

        del X_train, X_test, Y_train, Y_test
        del model
        gc.collect()
    print("Perplexity: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--seqlen", help="Sequence length")
    parser.add_argument("-o",
                        "--output",
                        help="Output .npy path",
                        required=True)
    args = parser.parse_args()

    if not args.seqlen:
        seq_len = DEFAULT_SEQ_LEN
    else:
        seq_len = int(args.seqlen)

    df = pd.read_csv(SRC_PATH)
    path = Path("chars.pkl")
    chars = list()
    if path.is_file():
        chars = util.load_vocab(path)
        print("Loaded vocabulary from file")
    else:
        vocab = set()
        for song in df["text"]:
            chars = set(song)
            vocab = vocab.union(chars)
        chars = list(vocab)
        util.write_vocab(path, chars)
        print("Generated character vocabulary as chars.pkl")

    vocab_size = len(chars)
    print("Vocab size:", vocab_size)
    char2idx = {char: i for i, char in enumerate(chars)}

    print("Generating training samples...")
    buffer_size = BUFFER_INC
    buffer = np.zeros((buffer_size, seq_len + 1), dtype=np.int64)
    i = 0
    for song in tqdm(df['text']):
        for xs in build_samples(song, seq_len):
            buffer[i] = [char2idx[x] for x in xs]
            i += 1

            if i >= buffer_size:
                buffer_size += BUFFER_INC
                buffer.resize((buffer_size, seq_len + 1))
    buffer.resize(i, seq_len + 1)
    print("Saving to {}...".format(args.output))
    np.save(args.output, buffer)
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-t", "--threshold", help="Threshold to drop words")
    parser.add_argument("-o",
                        "--output",
                        help="Output .pkl path",
                        required=True)
    args = parser.parse_args()

    if not args.threshold:
        threshold = DEFAULT_THRESH
    else:
        threshold = float(args.threshold)
    print("Threshold:", threshold)

    print("Calculating word frequencies...")
    freqs = dict()
    with open(SRC_PATH, "r") as f:
        for line in f:
            for token in line.rstrip().split(" "):
                if token not in freqs:
                    freqs[token] = 1
                else:
                    freqs[token] += 1

    total_words = len(freqs.keys())
    discard = set()
    for word in freqs.keys():
        z = freqs[word] / total_words
        p = (math.sqrt(z / threshold) + 1) * (threshold / z)
        if random() <= p:
            discard.add(word)

    print("Total words:", total_words)
    print("Discarded words:", len(discard))
    print("Target vocab size:", total_words - len(discard))

    words = util.load_vocab(SRC_VOCAB_PATH)
    new_words = list(set(words).difference(discard))
    util.write_vocab(args.output, new_words)
    print("Vocab written to:", args.output)
Beispiel #4
0
def run(data_file, is_train=False, **args):
    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print args
    if save_dir[-1] != '/':
        save_dir = save_dir + '/'

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    if save_name == '':
        save_name = '_'.join([model_name, optimizer_name])

    save_name = save_dir + save_name

    xp = cuda.cupy if args['gpu'] >= 0 else np
    efficient_gpu = False
    if args['gpu'] >= 0:
        cuda.get_device(args['gpu']).use()
        xp.random.seed(1234)
        efficient_gpu = args.get('efficient_gpu', False)

    def to_gpu(x):
        if args['gpu'] >= 0:
            return chainer.cuda.to_gpu(x)
        return x

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    input_idx = map(int, args['input_idx'].split(','))
    output_idx = map(int, args['output_idx'].split(','))
    word_input_idx = input_idx[0]  # NOTE: word_idx is first column!
    additional_input_idx = input_idx[1:]
    sentences_train = []
    if is_train:
        sentences_train = util.read_conll_file(filename=data_file,
                                               delimiter=delimiter)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        sentences_train = util.read_raw_file(filename=data_file,
                                             delimiter=u' ')

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:
        sentences_dev = util.read_conll_file(dev_file, delimiter=delimiter)
    if test_file:
        sentences_test = util.read_conll_file(test_file, delimiter=delimiter)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unkown pos tags
    # TODO: compute unk words
    vocab_adds = []
    if is_train:
        sentences_words_train = [[w_obj[word_input_idx] for w_obj in sentence]
                                 for sentence in sentences_train]
        vocab = util.build_vocab(sentences_words_train)
        vocab_char = util.build_vocab(util.flatten(sentences_words_train))
        vocab_tags = util.build_tag_vocab(sentences_train)

        # Additional setup
        for ad_feat_id in additional_input_idx:
            sentences_additional_train = [[feat_obj[ad_feat_id] for feat_obj in sentence]
                                          for sentence in sentences_train]
            vocab_add = util.build_vocab(sentences_additional_train)
            vocab_adds.append(vocab_add)
    elif is_test:
        vocab = util.load_vocab(save_vocab)
        vocab_char = util.load_vocab(save_vocab_char)
        vocab_tags = util.load_vocab(save_tags_vocab)

    if args.get('word_emb_file', False):
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_emb_vocab_type = args.get('word_emb_vocab_type')

        def assert_word_emb_shape(shape1, shape2):
            err_msg = '''Pre-trained embedding size is not equal to `--n_word_emb` ({} != {})'''
            if shape1 != shape2:
                err_msg = err_msg.format(str(shape1), str(shape2))
                raise ValueError(err_msg)

        def assert_no_emb(word_vecs):
            err_msg = '''There is no-embeddings! Please check your file `--word_emb_file`'''
            if word_vecs.shape[0] == 0:
                raise ValueError(err_msg)

        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            word_vecs, vocab_glove = util.load_glove_embedding_include_vocab(emb_file)
            vocab = vocab_glove
        elif word_emb_vocab_type == 'replace_only':
            word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab)
            assert_no_emb(word_vecs)

        elif word_emb_vocab_type == 'additional':
            word_vecs, vocab_glove = util.load_glove_embedding_include_vocab(emb_file)
            additional_vecs = []
            for word, word_idx in sorted(vocab_glove.items(), key=lambda x: x[1]):
                if word not in vocab:
                    vocab[word] = len(vocab)
                    additional_vecs.append(word_vecs[word_idx])
            additional_vecs = np.array(additional_vecs, dtype=np.float32)

    if args.get('vocab_file', False):
        vocab_file = args['vocab_file']
        vocab = util.load_vocab(vocab_file)

    if args.get('vocab_char_file', False):
        vocab_char_file = args['vocab_char_file']
        vocab_char = util.load_vocab(vocab_char_file)

    vocab_tags_inv = dict((v, k) for k, v in vocab_tags.items())
    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    tmp_xp = xp
    if efficient_gpu:
        tmp_xp = np  # use CPU (numpy)

    def parse_to_word_ids(sentences, word_input_idx, vocab):
        return util.parse_to_word_ids(sentences, xp=tmp_xp, vocab=vocab,
                                      UNK_IDX=UNK_IDX, idx=word_input_idx)

    def parse_to_char_ids(sentences):
        return util.parse_to_char_ids(sentences, xp=tmp_xp, vocab_char=vocab_char,
                                      UNK_IDX=CHAR_UNK_IDX, idx=word_input_idx)

    def parse_to_tag_ids(sentences):
        return util.parse_to_tag_ids(sentences, xp=tmp_xp, vocab=vocab_tags,
                                     UNK_IDX=-1, idx=-1)

    x_train = parse_to_word_ids(sentences_train, word_input_idx, vocab)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)
    x_train_additionals = [parse_to_word_ids(sentences_train, ad_feat_id, vocab_adds[i])
                           for i, ad_feat_id in enumerate(additional_input_idx)]

    x_dev = parse_to_word_ids(sentences_dev, word_input_idx, vocab)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)
    x_dev_additionals = [parse_to_word_ids(sentences_dev, ad_feat_id, vocab_adds[i])
                         for i, ad_feat_id in enumerate(additional_input_idx)]

    y_dev_cpu = [[w[-1] for w in sentence]
                 for sentence in sentences_dev]
    # tag_names = []
    tag_names = list(set([tag[2:] if len(tag) >= 2 else tag[0] for tag in vocab_tags.keys()]))

    x_test = parse_to_word_ids(sentences_test, word_input_idx, vocab)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)
    x_test_additionals = [parse_to_word_ids(sentences_test, ad_feat_id, vocab_adds[i])
                          for i, ad_feat_id in enumerate(additional_input_idx)]

    cnt_train_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)

    init_emb = None

    if is_train:
        util.write_vocab(save_vocab, vocab)
        util.write_vocab(save_vocab_char, vocab_char)
        util.write_vocab(save_tags_vocab, vocab_tags)
        util.write_vocab(save_train_config, args)

    n_vocab_add = [len(_vadd) for _vadd in vocab_adds]

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab), n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'], init_emb=init_emb,
                         char_input_dim=args['n_char_emb'],
                         char_hidden_dim=args['n_char_hidden'],
                         n_label=len(vocab_tags),
                         n_add_feature_dim=args['n_add_feature_emb'],
                         n_add_feature=len(n_vocab_add),
                         n_vocab_add=n_vocab_add,
                         use_cudnn=args['use_cudnn'])
    my_cudnn(args['use_cudnn'])

    if args.get('word_emb_file', False):

        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1])
            net.word_embed.W.data = word_vecs[:]
        elif word_emb_vocab_type == 'replace_only':
            assert_no_emb(word_vecs)
            assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1])
            net.word_embed.W.data[word_ids] = word_vecs[:]

        elif word_emb_vocab_type == 'additional':
            assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1])
            v_size = additional_vecs.shape[0]
            net.word_embed.W.data[-v_size:] = additional_vecs[:]

    if args.get('return_model', False):
        return net

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data, x_train_additionals=[]):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        # perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            x = x_data[index:index + batchsize]
            x_char = x_char_data[index:index + batchsize]
            target_y = y_data[index:index + batchsize]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[to_gpu(_) for _ in x_ad[index:index + batchsize]]
                                for x_ad in x_train_additionals]

            output = net(x_data=x, x_char_data=x_char, x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)

        _, predict_tags = zip(*predict_lists)
        predicted_results = []
        for predict in predict_tags:
            predicted = [vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)]
            predicted_results.append(predicted)

        return predict_lists, sum_loss, predicted_results

    if args['model_filename']:
        model_filename = args['model_filename']
        serializers.load_hdf5(model_filename, net)

    if is_test:
        # predict
        # model_filename = args['model_filename']
        # model_filename = save_dir + model_filename
        # serializers.load_hdf5(model_filename, net)
        vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()])
        x_predict = x_train
        x_char_predict = x_char_train
        y_predict = y_train

        if dev_file:
            predict_dev, loss_dev, predict_dev_tags = eval_loop(x_dev, x_char_dev, y_dev)
            gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
            result, phrase_info = util.conll_eval(
                gold_predict_pairs, flag=False, tag_class=tag_names)
            all_result = result['All_Result']
            print 'all_result:', all_result

        predict_pairs, _, _tmp = eval_loop(x_predict, x_char_predict, y_predict)
        _, predict_tags = zip(*predict_pairs)
        predicted_output = args['predicted_output']
        predicted_results = []
        for predict in predict_tags:
            predicted = [vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)]
            predicted_results.append(predicted)

        f = open(predicted_output, 'w')
        for predicted in predicted_results:
            for tag in predicted:
                f.write(tag + '\n')
            f.write('\n')
        f.close()

        return False

    tmax = args['max_iter']
    t = 0.0
    prev_dev_accuracy = 0.0
    prev_dev_f = 0.0
    for epoch in xrange(args['max_iter']):

        # train
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[to_gpu(x_ad[add_i]) for add_i in perm[index:index + batchsize]]
                                for x_ad in x_train_additionals]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            output = net(x_data=x, x_char_data=x_char, x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.zerograds()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = util.eval_accuracy(predict_train)

        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev, predict_dev_tags = eval_loop(
            x_dev, x_char_dev, y_dev, x_dev_additionals)

        gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
        result, phrase_info = util.conll_eval(gold_predict_pairs, flag=False, tag_class=tag_names)
        all_result = result['All_Result']

        # Evaluation
        dev_accuracy = util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))
        logging.info('  f_measure :' + str(all_result[-1]))

        dev_f = all_result[-1]

        if prev_dev_f < dev_f:
            logging.info(' [update best model on dev set!]')
            dev_list = [prev_dev_f, dev_f]
            dev_str = '       ' + ' => '.join(map(str, dev_list))
            logging.info(dev_str)
            prev_dev_f = dev_f

            # Save model
            model_filename = save_name + '_epoch' + str(epoch)
            serializers.save_hdf5(model_filename + '.model', net)
            serializers.save_hdf5(model_filename + '.state', opt)
Beispiel #5
0
def run(data_file, is_train=False, **args):
    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print args
    if save_dir[-1] != '/':
        save_dir = save_dir + '/'

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    if save_name == '':
        save_name = '_'.join([model_name, optimizer_name])

    save_name = save_dir + save_name

    xp = cuda.cupy if args['gpu'] >= 0 else np
    if args['gpu'] >= 0:
        cuda.get_device(args['gpu']).use()
        xp.random.seed(1234)

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    sentences_train = []
    if is_train:
        sentences_train = util.read_conll_file(filename=data_file,
                                               delimiter=delimiter,
                                               input_idx=0,
                                               output_idx=-1)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        sentences_train = util.read_raw_file(filename=data_file,
                                             delimiter=u' ')

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:
        sentences_dev = util.read_conll_file(dev_file,
                                             delimiter=delimiter,
                                             input_idx=0,
                                             output_idx=-1)
    if test_file:
        sentences_test = util.read_conll_file(test_file,
                                              delimiter=delimiter,
                                              input_idx=0,
                                              output_idx=-1)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unkown pos tags
    # TODO: compute unk words
    if is_train:
        sentences_words_train = [w_obj[0] for w_obj in sentences_train]
        vocab = util.build_vocab(sentences_words_train)
        vocab_char = util.build_vocab(util.flatten(sentences_words_train))
        vocab_tags = util.build_tag_vocab(sentences_train)
    elif is_test:
        vocab = util.load_vocab(save_vocab)
        vocab_char = util.load_vocab(save_vocab_char)
        vocab_tags = util.load_vocab(save_tags_vocab)

    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    def parse_to_word_ids(sentences):
        return util.parse_to_word_ids(sentences,
                                      xp=xp,
                                      vocab=vocab,
                                      UNK_IDX=UNK_IDX,
                                      idx=0)

    def parse_to_char_ids(sentences):
        return util.parse_to_char_ids(sentences,
                                      xp=xp,
                                      vocab_char=vocab_char,
                                      UNK_IDX=CHAR_UNK_IDX,
                                      idx=0)

    def parse_to_tag_ids(sentences):
        return util.parse_to_tag_ids(sentences,
                                     xp=xp,
                                     vocab=vocab_tags,
                                     UNK_IDX=-1,
                                     idx=-1)

    # if is_train:
    x_train = parse_to_word_ids(sentences_train)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)

    # elif is_test:
    #     x_predict = parse_to_word_ids(sentences_predict)
    #     x_char_predict = parse_to_char_ids(sentences_predict)
    #     y_predict = parse_to_tag_ids(sentences_predict)

    x_dev = parse_to_word_ids(sentences_dev)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)

    x_test = parse_to_word_ids(sentences_test)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)

    cnt_train_unk = sum([xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)
    util.write_vocab(save_vocab, vocab)
    util.write_vocab(save_vocab_char, vocab_char)
    util.write_vocab(save_tags_vocab, vocab_tags)
    util.write_vocab(save_train_config, args)

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab),
                         n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'],
                         init_emb=None,
                         n_label=len(vocab_tags))

    if args['word_emb_file']:
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab)
        net.word_embed.W.data[word_ids] = word_vecs

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        # perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            x = x_data[index:index + batchsize]
            x_char = x_char_data[index:index + batchsize]
            target_y = y_data[index:index + batchsize]

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)
        return predict_lists, sum_loss

    if is_test:
        # predict
        model_filename = args['model_filename']
        model_filename = save_dir + model_filename
        serializers.load_hdf5(model_filename, net)

        vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()])
        x_predict = x_train
        x_char_predict = x_char_train
        y_predict = y_train
        predict_pairs, _ = eval_loop(x_predict, x_char_predict, y_predict)
        _, predict_tags = zip(*predict_pairs)
        predicted_output = args['predicted_output']
        predicted_results = []
        for predict in predict_tags:
            predicted = [
                vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)
            ]
            predicted_results.append(predicted)

        f = open(predicted_output, 'w')
        for predicted in predicted_results:
            for tag in predicted:
                f.write(tag + '\n')
            f.write('\n')
        f.close()

        return False

    tmax = args['max_iter']
    t = 0.0
    for epoch in xrange(args['max_iter']):

        # train
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.zerograds()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = util.eval_accuracy(predict_train)

        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev = eval_loop(x_dev, x_char_dev, y_dev)

        # Evaluation
        dev_accuracy = util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))

        # Save model
        model_filename = save_name + '_epoch' + str(epoch)
        serializers.save_hdf5(model_filename + '.model', net)
        serializers.save_hdf5(model_filename + '.state', opt)
Beispiel #6
0
def run(data_file, is_train=False, **args):
    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print args
    if save_dir[-1] != '/':
        save_dir = save_dir + '/'

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    if save_name == '':
        save_name = '_'.join([model_name, optimizer_name])

    save_name = save_dir + save_name

    xp = cuda.cupy if args['gpu'] >= 0 else np
    efficient_gpu = False
    if args['gpu'] >= 0:
        cuda.get_device(args['gpu']).use()
        xp.random.seed(1234)
        efficient_gpu = args.get('efficient_gpu', False)

    def to_gpu(x):
        if args['gpu'] >= 0:
            return chainer.cuda.to_gpu(x)
        return x

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    input_idx = map(int, args['input_idx'].split(','))
    output_idx = map(int, args['output_idx'].split(','))
    word_input_idx = input_idx[0]  # NOTE: word_idx is first column!
    additional_input_idx = input_idx[1:]
    sentences_train = []
    if is_train:
        sentences_train = util.read_conll_file(filename=data_file,
                                               delimiter=delimiter)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        sentences_train = util.read_raw_file(filename=data_file,
                                             delimiter=u' ')

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:
        sentences_dev = util.read_conll_file(dev_file, delimiter=delimiter)
    if test_file:
        sentences_test = util.read_conll_file(test_file, delimiter=delimiter)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unkown pos tags
    # TODO: compute unk words
    vocab_adds = []
    if is_train:
        sentences_words_train = [[w_obj[word_input_idx] for w_obj in sentence]
                                 for sentence in sentences_train]
        vocab = util.build_vocab(sentences_words_train)
        vocab_char = util.build_vocab(util.flatten(sentences_words_train))
        vocab_tags = util.build_tag_vocab(sentences_train)

        # Additional setup
        for ad_feat_id in additional_input_idx:
            sentences_additional_train = [[
                feat_obj[ad_feat_id] for feat_obj in sentence
            ] for sentence in sentences_train]
            vocab_add = util.build_vocab(sentences_additional_train)
            vocab_adds.append(vocab_add)
    elif is_test:
        vocab = util.load_vocab(save_vocab)
        vocab_char = util.load_vocab(save_vocab_char)
        vocab_tags = util.load_vocab(save_tags_vocab)

    if args.get('word_emb_file', False):
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_emb_vocab_type = args.get('word_emb_vocab_type')

        def assert_word_emb_shape(shape1, shape2):
            err_msg = '''Pre-trained embedding size is not equal to `--n_word_emb` ({} != {})'''
            if shape1 != shape2:
                err_msg = err_msg.format(str(shape1), str(shape2))
                raise ValueError(err_msg)

        def assert_no_emb(word_vecs):
            err_msg = '''There is no-embeddings! Please check your file `--word_emb_file`'''
            if word_vecs.shape[0] == 0:
                raise ValueError(err_msg)

        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            word_vecs, vocab_glove = util.load_glove_embedding_include_vocab(
                emb_file)
            vocab = vocab_glove
        elif word_emb_vocab_type == 'replace_only':
            word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab)
            assert_no_emb(word_vecs)

        elif word_emb_vocab_type == 'additional':
            word_vecs, vocab_glove = util.load_glove_embedding_include_vocab(
                emb_file)
            additional_vecs = []
            for word, word_idx in sorted(vocab_glove.items(),
                                         key=lambda x: x[1]):
                if word not in vocab:
                    vocab[word] = len(vocab)
                    additional_vecs.append(word_vecs[word_idx])
            additional_vecs = np.array(additional_vecs, dtype=np.float32)

    if args.get('vocab_file', False):
        vocab_file = args['vocab_file']
        vocab = util.load_vocab(vocab_file)

    if args.get('vocab_char_file', False):
        vocab_char_file = args['vocab_char_file']
        vocab_char = util.load_vocab(vocab_char_file)

    vocab_tags_inv = dict((v, k) for k, v in vocab_tags.items())
    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    tmp_xp = xp
    if efficient_gpu:
        tmp_xp = np  # use CPU (numpy)

    def parse_to_word_ids(sentences, word_input_idx, vocab):
        return util.parse_to_word_ids(sentences,
                                      xp=tmp_xp,
                                      vocab=vocab,
                                      UNK_IDX=UNK_IDX,
                                      idx=word_input_idx)

    def parse_to_char_ids(sentences):
        return util.parse_to_char_ids(sentences,
                                      xp=tmp_xp,
                                      vocab_char=vocab_char,
                                      UNK_IDX=CHAR_UNK_IDX,
                                      idx=word_input_idx)

    def parse_to_tag_ids(sentences):
        return util.parse_to_tag_ids(sentences,
                                     xp=tmp_xp,
                                     vocab=vocab_tags,
                                     UNK_IDX=-1,
                                     idx=-1)

    x_train = parse_to_word_ids(sentences_train, word_input_idx, vocab)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)
    x_train_additionals = [
        parse_to_word_ids(sentences_train, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    x_dev = parse_to_word_ids(sentences_dev, word_input_idx, vocab)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)
    x_dev_additionals = [
        parse_to_word_ids(sentences_dev, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    y_dev_cpu = [[w[-1] for w in sentence] for sentence in sentences_dev]
    # tag_names = []
    tag_names = list(
        set([
            tag[2:] if len(tag) >= 2 else tag[0] for tag in vocab_tags.keys()
        ]))

    x_test = parse_to_word_ids(sentences_test, word_input_idx, vocab)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)
    x_test_additionals = [
        parse_to_word_ids(sentences_test, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    cnt_train_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)

    init_emb = None

    if is_train:
        util.write_vocab(save_vocab, vocab)
        util.write_vocab(save_vocab_char, vocab_char)
        util.write_vocab(save_tags_vocab, vocab_tags)
        util.write_vocab(save_train_config, args)

    n_vocab_add = [len(_vadd) for _vadd in vocab_adds]

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab),
                         n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'],
                         init_emb=init_emb,
                         char_input_dim=args['n_char_emb'],
                         char_hidden_dim=args['n_char_hidden'],
                         n_label=len(vocab_tags),
                         n_add_feature_dim=args['n_add_feature_emb'],
                         n_add_feature=len(n_vocab_add),
                         n_vocab_add=n_vocab_add,
                         use_cudnn=args['use_cudnn'])
    my_cudnn(args['use_cudnn'])

    if args.get('word_emb_file', False):

        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            net.word_embed.W.data = word_vecs[:]
        elif word_emb_vocab_type == 'replace_only':
            assert_no_emb(word_vecs)
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            net.word_embed.W.data[word_ids] = word_vecs[:]

        elif word_emb_vocab_type == 'additional':
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            v_size = additional_vecs.shape[0]
            net.word_embed.W.data[-v_size:] = additional_vecs[:]

    if args.get('return_model', False):
        return net

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data, x_train_additionals=[]):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        # perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            x = x_data[index:index + batchsize]
            x_char = x_char_data[index:index + batchsize]
            target_y = y_data[index:index + batchsize]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[
                    to_gpu(_) for _ in x_ad[index:index + batchsize]
                ] for x_ad in x_train_additionals]

            output = net(x_data=x,
                         x_char_data=x_char,
                         x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)

        _, predict_tags = zip(*predict_lists)
        predicted_results = []
        for predict in predict_tags:
            predicted = [
                vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)
            ]
            predicted_results.append(predicted)

        return predict_lists, sum_loss, predicted_results

    if args['model_filename']:
        model_filename = args['model_filename']
        serializers.load_hdf5(model_filename, net)

    if is_test:
        # predict
        # model_filename = args['model_filename']
        # model_filename = save_dir + model_filename
        # serializers.load_hdf5(model_filename, net)
        vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()])
        x_predict = x_train
        x_char_predict = x_char_train
        y_predict = y_train

        if dev_file:
            predict_dev, loss_dev, predict_dev_tags = eval_loop(
                x_dev, x_char_dev, y_dev)
            gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
            result, phrase_info = util.conll_eval(gold_predict_pairs,
                                                  flag=False,
                                                  tag_class=tag_names)
            all_result = result['All_Result']
            print 'all_result:', all_result

        predict_pairs, _, _tmp = eval_loop(x_predict, x_char_predict,
                                           y_predict)
        _, predict_tags = zip(*predict_pairs)
        predicted_output = args['predicted_output']
        predicted_results = []
        for predict in predict_tags:
            predicted = [
                vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)
            ]
            predicted_results.append(predicted)

        f = open(predicted_output, 'w')
        for predicted in predicted_results:
            for tag in predicted:
                f.write(tag + '\n')
            f.write('\n')
        f.close()

        return False

    tmax = args['max_iter']
    t = 0.0
    prev_dev_accuracy = 0.0
    prev_dev_f = 0.0
    for epoch in xrange(args['max_iter']):

        # train
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[
                    to_gpu(x_ad[add_i])
                    for add_i in perm[index:index + batchsize]
                ] for x_ad in x_train_additionals]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            output = net(x_data=x,
                         x_char_data=x_char,
                         x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.zerograds()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = util.eval_accuracy(predict_train)

        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev, predict_dev_tags = eval_loop(
            x_dev, x_char_dev, y_dev, x_dev_additionals)

        gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
        result, phrase_info = util.conll_eval(gold_predict_pairs,
                                              flag=False,
                                              tag_class=tag_names)
        all_result = result['All_Result']

        # Evaluation
        dev_accuracy = util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))
        logging.info('  f_measure :' + str(all_result[-1]))

        dev_f = all_result[-1]

        if prev_dev_f < dev_f:
            logging.info(' [update best model on dev set!]')
            dev_list = [prev_dev_f, dev_f]
            dev_str = '       ' + ' => '.join(map(str, dev_list))
            logging.info(dev_str)
            prev_dev_f = dev_f

            # Save model
            model_filename = save_name + '_epoch' + str(epoch)
            serializers.save_hdf5(model_filename + '.model', net)
            serializers.save_hdf5(model_filename + '.state', opt)