Esempio n. 1
0
    def read_lstm_model(self, params, train):

        assert train == False  # reading a model to continue training is currently not supported

        words_file = params['config_path'] + params['words_file']
        model_file = params['config_path'] + params['model_file']
        unit = int(params['unit'])
        deep = (params['deep'] == 'yes')
        drop_ratio = float(params['drop_ratio'])

        #read and normalize target word embeddings
        w, word2index, index2word = self.read_words(words_file)
        s = numpy.sqrt((w * w).sum(1))
        s[s == 0.] = 1.
        w /= s.reshape((s.shape[0], 1))  # normalize

        context_word_units = unit
        lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * unit
        target_word_units = IN_TO_OUT_UNITS_RATIO * unit

        cs = [1 for _ in range(len(word2index))
              ]  # dummy word counts - not used for eval
        loss_func = L.NegativeSampling(
            target_word_units, cs,
            NEGATIVE_SAMPLING_NUM)  # dummy loss func - not used for eval

        model = BiLstmContext(deep, self.gpu, word2index, context_word_units,
                              lstm_hidden_units, target_word_units, loss_func,
                              train, drop_ratio)
        S.load_npz(model_file, model)

        return w, word2index, index2word, model
Esempio n. 2
0
 def __init__(self,
              n_documents=100,
              n_document_topics=10,
              n_units=256,
              n_vocab=1000,
              dropout_ratio=0.5,
              train=True,
              counts=None,
              n_samples=15,
              word_dropout_ratio=0.0):
     em = EmbedMixture(n_documents,
                       n_document_topics,
                       n_units,
                       dropout_ratio=dropout_ratio)
     kwargs = {}
     kwargs['mixture'] = em
     kwargs['sampler'] = L.NegativeSampling(n_units, counts, n_samples)
     super(LDA2Vec, self).__init__(**kwargs)
     rand = np.random.random(self.sampler.W.data.shape)
     self.sampler.W.data[:, :] = rand[:, :]
     self.n_units = n_units
     self.train = train
     self.dropout_ratio = dropout_ratio
     self.word_dropout_ratio = word_dropout_ratio
     self.n_samples = n_samples
 def create_link(self, rng=None):
     if rng is None:
         rng = numpy.random.RandomState()
     link = links.NegativeSampling(self.in_size, [10, 5, 2, 5, 2],
                                   self.sample_size)
     link.cleargrads()
     # W is initialized with zero. Inject random values for meaningful test.
     link.W.array[:] = rng.uniform(-1, 1, link.W.shape)
     return link
Esempio n. 4
0
 def __init__(self, counts, n_docs, n_topics, n_dim, n_vocab, n_samples=5):
     factors = np.random.random((n_topics, n_dim)).astype('float32')
     loss_func = L.NegativeSampling(n_dim, counts, n_samples)
     loss_func.W.data[:, :] = np.random.randn(*loss_func.W.data.shape)
     loss_func.W.data[:, :] /= np.sqrt(np.prod(loss_func.W.data.shape))
     super(NSLDA, self).__init__(proportions=L.EmbedID(n_docs, n_topics),
                                 factors=L.Parameter(factors),
                                 loss_func=loss_func)
     self.n_docs = n_docs
     self.n_topics = n_topics
     self.n_vocab = n_vocab
     self.n_dim = n_dim
Esempio n. 5
0
    def read_model(self, params):
        user_file = os.path.join(params['config_path'],
                                 params['user_filename'])
        item_file = os.path.join(params['config_path'],
                                 params['item_filename'])
        vocab_file = os.path.join(params['config_path'],
                                  params['vocab_filename'])
        aspect_file = os.path.join(params['config_path'],
                                   params['aspect_filename'])
        opinion_file = os.path.join(params['config_path'],
                                    params['opinion_filename'])
        aspect_opinions_file = os.path.join(params['config_path'],
                                            params['aspect_opinions_filename'])
        model_file = os.path.join(params['config_path'],
                                  params['model_filename'])

        context_word_units = int(params['unit'])
        lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * context_word_units
        target_word_units = IN_TO_OUT_UNITS_RATIO * context_word_units

        user2index = load_dict(user_file)
        item2index = load_dict(item_file)
        word2index = load_dict(vocab_file)
        aspect2index = load_dict(aspect_file)
        opinion2index = load_dict(opinion_file)
        aspect_opinions = load_json(aspect_opinions_file)

        n_user = max(user2index.values()) + 1
        n_item = max(item2index.values()) + 1
        n_vocab = max(word2index.values()) + 1
        n_aspect = max(aspect2index.values()) + 1

        n_encode = n_aspect

        # dummy word counts - not used for eval
        cs = [1 for _ in range(n_vocab)]
        # dummy loss func - not used for eval
        loss_func = L.NegativeSampling(target_word_units, cs,
                                       NEGATIVE_SAMPLING_NUM)

        if params['model_type'] == 'c2v':
            model = Context2Vec(self.gpu, n_vocab, context_word_units,
                                lstm_hidden_units, target_word_units,
                                loss_func, self.resume)
        elif params['model_type'] in ['asc2v', 'asc2v-mter']:
            model = AspectSentiContext2Vec(self.gpu, n_vocab, n_encode,
                                           context_word_units,
                                           lstm_hidden_units,
                                           target_word_units, loss_func,
                                           self.resume)
        S.load_npz(model_file, model)
        w = model.loss_func.W.data
        return user2index, item2index, w, word2index, aspect2index, opinion2index, aspect_opinions, model
 def setUp(self):
     # Create two identical datasets except that 2nd dataset has the
     # negative targets explicitly removed. Both cases should have identical
     # outcomes.
     self.link = links.NegativeSampling(3, [10, 5, 2, 5, 2], 2)
     self.link.zerograds()
     self.x = numpy.random.uniform(-1, 1, (3, 3)).astype(numpy.float32)
     self.t = numpy.array([-1, 1, 2]).astype(numpy.int32)
     self.gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32)
     self.idx = self.t > -1
     self.x0 = self.x.copy()[self.idx]
     self.t0 = self.t.copy()[self.idx]
     self.gy0 = self.gy.copy()
Esempio n. 7
0
    def setUp(self):
        batch = len(self.t)
        x_shape = (batch, self.in_size)
        self.link = links.NegativeSampling(self.in_size, [10, 5, 2, 5, 2],
                                           self.sample_size)
        self.link.cleargrads()
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(numpy.float32)
        self.t = numpy.array(self.t).astype(numpy.int32)

        if self.reduce == 'no':
            g_shape = self.t.shape
        elif self.reduce == 'sum':
            g_shape = ()
        self.gy = numpy.random.uniform(-1, 1, g_shape).astype(numpy.float32)
Esempio n. 8
0
def get_context_model(args, data_loader):
    if args.resume:
        model_reader = ModelReader(args.resume, args.gpu, True, data_loader.word2count)
        model = model_reader.model
    else:
        n_vocab = data_loader.n_vocab
        if args.context in ["sc2v", "sc2v-mter"]:
            n_aspect = 1
        else:
            n_aspect = data_loader.n_aspect
        context_word_units = args.unit
        lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * args.unit
        target_word_units = IN_TO_OUT_UNITS_RATIO * args.unit
        cs = [data_loader.word2count[w] for w in range(n_vocab)]
        loss_func = L.NegativeSampling(
            target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power
        )
        loss_func.W.data[...] = 0
        if args.context == "c2v":
            model = Context2Vec(
                args.gpu,
                n_vocab,
                context_word_units,
                lstm_hidden_units,
                target_word_units,
                loss_func,
                True,
                args.dropout,
            )
        elif args.context in [
            "ac2v",
            "sc2v",
            "asc2v",
            "sc2v-mter",
            "asc2v-mter",
            "aoc2v",
            "rasc2v",
        ]:
            model = AspectSentiContext2Vec(
                args.gpu,
                n_vocab,
                n_aspect,
                context_word_units,
                lstm_hidden_units,
                target_word_units,
                loss_func,
                True,
                args.dropout,
            )
    return model
Esempio n. 9
0
 def __init__(self, n_documents=100, n_document_topics=10,
              n_units=256, n_vocab=1000, dropout_ratio=0.5, train=True,
              counts=None, n_samples=15):
     em = EmbedMixture(n_documents, n_document_topics, n_units,
                       dropout_ratio=dropout_ratio)
     kwargs = {}
     kwargs['mixture'] = em
     kwargs['embed'] = L.EmbedID(n_vocab, n_units)
     kwargs['sampler'] = L.NegativeSampling(n_units, counts, n_samples)
     super(LDA2Vec, self).__init__(**kwargs)
     self.n_units = n_units
     self.train = train
     self.dropout_ratio = dropout_ratio
     self.n_samples = n_samples
Esempio n. 10
0
    def __init__(self, n_lemma_vocab, n_emb_size, hidden_size=100,
                 n_units=100, counts=None, k=15, init_embed=None, dropout=0, freeze=0):
        super(Unsp_Model, self).__init__()
        with self.init_scope():
            self.lemma_embed = L.EmbedID(n_lemma_vocab, n_emb_size, initialW=init_embed)
            if freeze == 1:
                self.lemma_embed.disable_update()

            self.l1 = L.Linear(hidden_size)
            self.l2 = L.Linear(n_units)
            self.path_ns = L.NegativeSampling(n_units, counts, k)

            self.n_units = n_units
            self.n_lemma_vocab = n_lemma_vocab
            self.counts = counts
Esempio n. 11
0
def get_loss_func(args, vocab_context):
    word_counts = vocab_context.lst_frequencies
    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        d_counts = {i: word_counts[i] for i in range(len(word_counts))}
        tree = HSM.create_huffman_tree(d_counts)
        loss_func = HSM(args.dimensions, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == 'ns':
        cs = [word_counts[w] for w in range(len(word_counts))]
        loss_func = L.NegativeSampling(args.dimensions, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.dimensions, vocab_context.cnt_words)

    return loss_func
Esempio n. 12
0
 def finalize(self):
     loss_func = L.NegativeSampling(self.n_hidden, self.counts,
                                    self.n_samples)
     data = np.random.randn(len(self.counts), self.n_hidden)
     data /= np.sqrt(np.prod(data.shape))
     loss_func.W.data[:] = data[:].astype('float32')
     kwargs = dict(vocab=L.EmbedID(self.n_words, self.n_hidden),
                   loss_func=loss_func)
     for name, (em, transform, lf, cp) in self.categorical_features.items():
         kwargs[name + '_mixture'] = em
         if transform is not None:
             kwargs[name + '_linear'] = transform
     super(LDA2Vec, self).__init__(**kwargs)
     self._setup()
     self._finalized = True
     self.logger.info("Finalized the class")
Esempio n. 13
0
    def __init__(self,
                 n_documents=100,
                 n_document_topics=10,
                 n_units=256,
                 n_vocab=1000,
                 dropout_ratio=0.5,
                 train=True,
                 counts=None,
                 n_samples=15,
                 word_dropout_ratio=0.0,
                 power=0.75,
                 temperature=1.0,
                 vocab=None,
                 docu_initialW=None):
        em = EmbedMixture(n_documents,
                          n_document_topics,
                          n_units,
                          dropout_ratio=dropout_ratio,
                          temperature=temperature,
                          docu_initialW=docu_initialW)
        kwargs = {}
        kwargs['mixture'] = em

        # (Pdb) self.sampler.W.data.shape -> (4891, 300)
        # (Pdb) n_units -> 300, embedding dimensions
        # (Pdb) counts -> array([ 0,  0,  0, ..., 30, 30, 29], dtype=int32)
        # (Pdb) counts.shape -> (4891,)
        # (Pdb) len(vocab) -> 4891
        # (Pdb) vocab[0] -> '<SKIP>', vocab[1] -> 'out_of_vocabulary',  vocab[2] -> '-PRON-'
        kwargs['sampler'] = L.NegativeSampling(n_units,
                                               counts,
                                               n_samples,
                                               power=power)
        super(LDA2Vec, self).__init__(**kwargs)

        # note that sample.W.data will be loaded with pre-trained GoogleNews
        # word2vec data later in lda2vec_run.py
        rand = np.random.random(self.sampler.W.data.shape)
        self.sampler.W.data[:, :] = rand[:, :]

        self.n_units = n_units
        self.train = train
        self.dropout_ratio = dropout_ratio
        self.word_dropout_ratio = word_dropout_ratio
        self.n_samples = n_samples
        self.vocab = vocab
Esempio n. 14
0
def skipgram_embedding(data,
                       dim=50,
                       batchsize=32,
                       window=10,
                       negative_sample=5,
                       epochs=10) -> list:

    cs = [data.counts[w] for w in range(len(data.counts))]
    loss_func = L.NegativeSampling(dim, cs, negative_sample)
    model = models.SkipGram(data.n_vocab, dim, loss_func)

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train_iter = models.WindowIterator(data.x_train, window, batchsize)
    val_iter = models.WindowIterator(data.x_test,
                                     window,
                                     batchsize,
                                     repeat=False)

    # Set up an updater
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=-1)

    # Set up a trainer
    trainer = training.Trainer(updater, (epochs, 'epoch'), out="result")
    trainer.extend(
        extensions.Evaluator(val_iter, model, converter=convert, device=-1))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())
    chainer.config.train = True
    trainer.run()
    return model.predictor.embed.W.data
Esempio n. 15
0
    val = val[:100]

vocab = chainer.datasets.get_ptb_words_vocabulary()
index2word = {wid: word for word, wid in six.iteritems(vocab)}

print('n_vocab: %d' % n_vocab)
print('data length: %d' % len(train))

if args.out_type == 'hsm':
    HSM = L.BinaryHierarchicalSoftmax
    tree = HSM.create_huffman_tree(counts)
    loss_func = HSM(args.unit, tree)
    loss_func.W.data[...] = 0
elif args.out_type == 'ns':
    cs = [counts[w] for w in range(len(counts))]
    loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
    loss_func.W.data[...] = 0
elif args.out_type == 'original':
    loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
else:
    raise Exception('Unknown output type: {}'.format(args.out_type))

if args.model == 'skipgram':
    model = SkipGram(n_vocab, args.unit, loss_func)
elif args.model == 'cbow':
    model = ContinuousBoW(n_vocab, args.unit, loss_func)
else:
    raise Exception('Unknown model type: {}'.format(args.model))

if args.gpu >= 0:
    model.to_gpu()
Esempio n. 16
0
def execute_c():
    index2word = {}
    word2index = {}
    dataset = []
    counts = collections.Counter()
    with open(args["data"]) as f:
        for line in f:
            for word in line.split():
                if word not in word2index:
                    ind = len(word2index)
                    word2index[word] = ind
                    index2word[ind] = word
                counts[word2index[word]] += 1
                dataset.append(word2index[word])

    n_vocab = len(word2index)

    print("n_vocab: %d" % n_vocab)
    print("data length: %d" % len(dataset))

    if args["out_type"] == "hsm":
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args["unit"], tree)
    elif args["out_type"] == "ns":
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args["unit"], cs, 20)
    elif args["out_type"] == "original":
        loss_func = SoftmaxCrossEntropyLoss(args["unit"], n_vocab)
    else:
        raise Exception("Unknown output type: {}".format(args["out_type"]))

    if args["model"] == "skipgram":
        model = SkipGram(n_vocab, args["unit"], loss_func)
    elif args["model"] == "cbow":
        model = ContinuousBow(n_vocab, args["unit"], loss_func)
    else:
        raise Exception('Unknown model type:'.format(args["model"]))

    dataset = np.array(dataset, dtype=np.int32)

    optimizer = O.Adam()
    optimizer.setup(model)

    begin_time = time.time()
    cur_at = begin_time
    word_count = 0
    skip = (len(dataset) - args["window"] * 2) // args["batchsize"]
    next_count = 100000
    for epoch in range(args["epoch"]):
        accum_loss = 0
        print('epoch: {0}'.format(epoch))
        indexes = np.random.permutation(skip)
        for i in indexes:
            if word_count >= next_count:
                now = time.time()
                duration = now - cur_at
                throuput = 100000. / duration
                print('{} word, {:.2f} sec, {:.2f} word/sec'.format(
                    word_count, duration, throuput))
                next_count += 100000
                cur_at = now

            position = np.array(range(0, args["batchsize"])) * skip + (
                args["window"] + i)
            loss = calculate_loss(model, dataset, position)
            accum_loss += loss.data
            word_count += args["batchsize"]

            model.zerograds()
            loss.backward()
            optimizer.update()

        print(accum_loss)

    with open('word2vec.model', 'w') as f:
        f.write('%d %d\n' % (len(index2word), args["unit"]))
        w = model.weight_xi.W.data
        for i in range(w.shape[0]):
            v = ' '.join(['%f' % v for v in w[i]])
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 17
0
context_word_units = args.unit
lstm_hidden_units = IN_TO_OUT_UNITS_RATIO*args.unit
target_word_units = IN_TO_OUT_UNITS_RATIO*args.unit

# if args.gpu >= 0:
#     cuda.check_cuda_available()
#     cuda.get_device(args.gpu).use()
# xp = cuda.cupy if args.gpu >= 0 else np
xp = np
    
reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize)
print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens
print('corpus size: %d' % (reader.total_words))

cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))]
loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power)

if args.context == 'lstm':
    model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout)
else:
    raise Exception('Unknown context type: {}'.format(args.context))

optimizer = O.Adam(alpha=args.alpha)
optimizer.setup(model)

if args.grad_clip:
    optimizer.add_hook(GradientClipping(args.grad_clip))

STATUS_INTERVAL = 1000000

for epoch in range(args.epoch):
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device',
                        '-d',
                        type=str,
                        default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--unit',
                        '-u',
                        default=200,
                        type=int,
                        help='number of units')
    parser.add_argument('--window',
                        '-w',
                        default=10,
                        type=int,
                        help='window size')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1000,
                        help='learning minibatch size')
    parser.add_argument('--epoch',
                        '-e',
                        default=3,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--model',
                        '-m',
                        choices=['skipgram', 'cbow'],
                        default='skipgram',
                        help='model type ("skipgram", "cbow")')
    parser.add_argument('--negative-size',
                        default=5,
                        type=int,
                        help='number of negative samples')
    parser.add_argument('--out-type',
                        '-o',
                        choices=['hsm', 'ns', 'original'],
                        default='hsm',
                        help='output model type ("hsm": hierarchical softmax, '
                        '"ns": negative sampling, "original": '
                        'no approximation)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu',
                       '-g',
                       dest='device',
                       type=int,
                       nargs='?',
                       const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    if chainer.get_dtype() == np.float16:
        warnings.warn('This example may cause NaN in FP16 mode.',
                      RuntimeWarning)

    device = chainer.get_device(args.device)
    device.use()

    with open('tokenized_data.txt', 'r', encoding='utf-8') as f:
        data = f.read().split()

    import collections

    index2word = {}
    word2index = {}

    idx = 0
    for w in set(data):
        word2index[w] = idx
        index2word[idx] = w
        idx += 1

    data_array = []

    for w in data:
        data_array.append(word2index[w])

    data_array = np.array(data_array, dtype='int32')

    # Set up the dataset
    train = data_array[:]

    counts = collections.Counter(train)
    n_vocab = max(train) + 1

    vocab = word2index

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    print('n_vocab: %d' % n_vocab)
    print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.array[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.array[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    # Choose the model
    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)

    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)

    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    model.to_device(device)

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train_iter = WindowIterator(train, args.window, args.batchsize)

    # Set up an updater
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                converter=convert,
                                                device=device)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    # Save the word2vec model
    with open('word2vec.model', 'w', encoding='utf-8') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.array)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 19
0
def main():
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()

    train, val, _ = chainer.datasets.get_ptb_words()
    counts = collections.Counter(train)
    counts.update(collections.Counter(val))

    n_vocab = max(train) + 1

    if args.test:
        train = train[:100]
        val = val[:100]
    vocab = chainer.datasets.get_ptb_words_vocabulary(
    )  # dict which maps word2index
    index2word = {wid: word
                  for word, wid in six.iteritems(vocab)
                  }  # dict which maps index2word

    print("n_vocab: %d" % n_vocab)
    print("data length: %d" % len(train))

    if args.out_type == "hsm":
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == "ns":
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == "original":
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception("Unknown output type: {}".format(args.out_type))

    if args.model == "skipgram":
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == "cbow":
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception("Unknown model type: {}".format(args.model))

    if args.gpu >= 0:
        model.to_gpu()

    optimizer = O.Adam()
    optimizer.setup(model)

    train_iter = WindowIterator(train, args.window, args.batchsize)
    val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out)

    trainer.extend(
        extensions.Evaluator(val_iter,
                             model,
                             converter=convert,
                             device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    with open("word2vec.model", "w") as f:
        f.write("%d %d\n" % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.data)
        for i, wi in enumerate(w):
            v = " ".join(map(str, wi))
            f.write("%s %s\n" % (index2word[i], v))
Esempio n. 20
0
 def setUp(self):
     self.link = links.NegativeSampling(3, [10, 5, 2, 5, 2], 2)
     self.link.zerograds()
     self.x = numpy.random.uniform(-1, 1, (2, 3)).astype(numpy.float32)
     self.t = numpy.array([0, 2]).astype(numpy.int32)
     self.gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32)
Esempio n. 21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device',
                        '-d',
                        type=str,
                        default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--unit',
                        '-u',
                        default=100,
                        type=int,
                        help='number of units')
    parser.add_argument('--window',
                        '-w',
                        default=5,
                        type=int,
                        help='window size')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1000,
                        help='learning minibatch size')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--model',
                        '-m',
                        choices=['skipgram', 'cbow'],
                        default='skipgram',
                        help='model type ("skipgram", "cbow")')
    parser.add_argument('--negative-size',
                        default=5,
                        type=int,
                        help='number of negative samples')
    parser.add_argument('--out-type',
                        '-o',
                        choices=['hsm', 'ns', 'original'],
                        default='hsm',
                        help='output model type ("hsm": hierarchical softmax, '
                        '"ns": negative sampling, "original": '
                        'no approximation)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        type=str,
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot-interval',
                        type=int,
                        help='Interval of snapshots')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.set_defaults(test=False)
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu',
                       '-g',
                       dest='device',
                       type=int,
                       nargs='?',
                       const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = chainer.get_device(args.device)
    device.use()

    if args.snapshot_interval is None:
        args.snapshot_interval = args.epoch
    args.snapshot_interval = min(args.snapshot_interval, args.epoch)

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    # Load the dataset
    train, val, _ = chainer.datasets.get_ptb_words()
    counts = collections.Counter(train)
    counts.update(collections.Counter(val))
    n_vocab = max(train) + 1

    if args.test:
        train = train[:100]
        val = val[:100]

    vocab = chainer.datasets.get_ptb_words_vocabulary()
    index2word = {wid: word for word, wid in six.iteritems(vocab)}

    print('n_vocab: %d' % n_vocab)
    print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.array[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.array[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    # Choose the model
    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    model.to_device(device)

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train_iter = WindowIterator(train, args.window, args.batchsize)
    val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)

    # Set up an updater
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                converter=convert,
                                                device=device)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        extensions.Evaluator(val_iter, model, converter=convert,
                             device=device))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())

    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=(args.snapshot_interval, 'epoch'))

    if args.resume is not None:
        chainer.serializers.load_npz(args.resume, trainer)
    trainer.run()

    # Save the word2vec model
    with open(os.path.join(args.out, 'word2vec.model'), 'w') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.array)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--unit', '-u', default=100, type=int,
                        help='number of units')
    parser.add_argument('--window', '-w', default=5, type=int,
                        help='window size')
    parser.add_argument('--batchsize', '-b', type=int, default=1000,
                        help='learning minibatch size')
    parser.add_argument('--epoch', '-e', default=20, type=int,
                        help='number of epochs to learn')
    parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'],
                        default='skipgram',
                        help='model type ("skipgram", "cbow")')
    parser.add_argument('--negative-size', default=5, type=int,
                        help='number of negative samples')
    parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'],
                        default='hsm',
                        help='output model type ("hsm": hierarchical softmax, '
                        '"ns": negative sampling, "original": '
                        'no approximation)')
    parser.add_argument('--out', default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.add_argument('--wakati_corpus_list')
    parser.add_argument('--num_tokens', type=int, default=None, help='If not set, we count words as the 1st-pash.')
    parser.add_argument('--word_count_threshold', default=5, type=int)
    parser.set_defaults(test=False)
    args = parser.parse_args()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        cuda.check_cuda_available()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()

    wakati_corpus_list = [line.rstrip() for line in open(args.wakati_corpus_list, 'r').readlines() if not re.match('^\s*#', line)]

    # Create vocab.
    vocab = word2vec_module.create_vocab(wakati_corpus_list, count_threshold=args.word_count_threshold)
    index2word = dict([(wid, word) for (word, wid) in vocab.items()])

    # Load the dataset
    words_generator = word2vec_module.WordsGenerator(wakati_corpus_list, batch_size=1000)

    class WidsGenerator:
    
        def __init__(self, words_generator, vocab):
            self.words_generator = words_generator
            self.vocab = vocab

        def __call__(self):
            for words in self.words_generator():
                wids = [vocab[word] if word in vocab else 0 for word in words]
                yield wids

    class WidGenerator:
 
        def __init__(self, wids_generator):
            self.wids_generator = wids_generator

        def __call__(self):
            for wids in self.wids_generator():
                for wid in wids:
                    yield wid

    wids_generator = WidsGenerator(words_generator, vocab)   # Generator call returns iterator object.
    wid_generator = WidGenerator(wids_generator)
    # train, val, _ = chainer.datasets.get_ptb_words()
    num_tokens = len([wid for wid in wid_generator()]) if args.num_tokens is None else args.num_tokens
    train = itertools.islice(wid_generator(), min(int(num_tokens*0.05), 10000), sys.maxsize)
    val = itertools.islice(wid_generator(), 0, min(int(num_tokens*0.05), 10000))
    counts = collections.Counter(wid_generator())
    # counts.update(collections.Counter(WidGenerator(val)()))
    # n_vocab = max(train) + 1
    n_vocab = len(vocab)

    # if args.test:
    #     train = train[:100]
    #     val = val[:100]

    print('n_vocab: %d' % n_vocab)
    # print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    # Choose the model
    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    if args.gpu >= 0:
        model.to_gpu()

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train = itertools.islice(wids_generator(), min(int(num_tokens*0.05), 10000), sys.maxsize)
    val = itertools.islice(wids_generator(), 0, min(int(num_tokens*0.05), 10000))
    train_iter = WindowIteratorIterator(train, args.window, args.batchsize)
    val_iter = WindowIteratorIterator(val, args.window, args.batchsize, repeat=False)
    # train_iter = WindowIterator(train, args.window, args.batchsize)
    # val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)

    # Set up an updater
    updater = training.updater.StandardUpdater(
        train_iter, optimizer, converter=convert, device=args.gpu)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(
        val_iter, model, converter=convert, device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    # Save the word2vec model
    with open('word2vec.model', 'w') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.data)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--unit',
                        '-u',
                        default=100,
                        type=int,
                        help='number of units')
    parser.add_argument('--window',
                        '-w',
                        default=5,
                        type=int,
                        help='window size')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1000,
                        help='learning minibatch size')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--model',
                        '-m',
                        choices=['skipgram', 'cbow'],
                        default='skipgram',
                        help='model type ("skipgram", "cbow")')
    parser.add_argument('--negative-size',
                        default=5,
                        type=int,
                        help='number of negative samples')
    parser.add_argument('--out-type',
                        '-o',
                        choices=['hsm', 'ns', 'original'],
                        default='hsm',
                        help='output model type ("hsm": hierarchical softmax, '
                        '"ns": negative sampling, "original": '
                        'no approximation)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        cuda.check_cuda_available()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()

    # Load the dataset
    train, val, _ = chainer.datasets.get_ptb_words()
    counts = collections.Counter(train)
    counts.update(collections.Counter(val))
    n_vocab = max(train) + 1

    if args.test:
        train = train[:100]
        val = val[:100]

    vocab = chainer.datasets.get_ptb_words_vocabulary()
    index2word = {wid: word for word, wid in six.iteritems(vocab)}

    print('n_vocab: %d' % n_vocab)
    print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    # Choose the model
    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    if args.gpu >= 0:
        model.to_gpu()

    # Set up an optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    # Set up an iterator
    train_iter = WindowIterator(train, args.window, args.batchsize)
    val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)

    # Set up an updater
    updater = training.updater.StandardUpdater(train_iter,
                                               optimizer,
                                               converter=convert,
                                               device=args.gpu)

    # Set up a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        extensions.Evaluator(val_iter,
                             model,
                             converter=convert,
                             device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    # Save the word2vec model
    with open('word2vec.model', 'w') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.data)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 24
0
                index2word[ind] = word
            counts[word2index[word]] += 1
            dataset.append(word2index[word])

n_vocab = len(word2index)

print('n_vocab: %d' % n_vocab)
print('data length: %d' % len(dataset))

if args.out_type == 'hsm':
    HSM = L.BinaryHierarchicalSoftmax
    tree = HSM.create_huffman_tree(counts)
    loss_func = HSM(args.unit, tree)
elif args.out_type == 'ns':
    cs = [counts[w] for w in range(len(counts))]
    loss_func = L.NegativeSampling(args.unit, cs, 20)
elif args.out_type == 'original':
    loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
else:
    raise Exception('Unknown output type: {}'.format(args.out_type))

if args.model == 'skipgram':
    model = SkipGram(n_vocab, args.unit, loss_func)
elif args.model == 'cbow':
    model = ContinuousBoW(n_vocab, args.unit, loss_func)
else:
    raise Exception('Unknown model type: {}'.format(args.model))

if args.gpu >= 0:
    model.to_gpu()
Esempio n. 25
0
def main():
    args = get_args()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        cuda.check_cuda_available()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('Window: {}'.format(args.window))
    print('Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Training model: {}'.format(args.model))
    print('Output type: {}'.format(args.out_type))
    print('')

    train, val, _ = chainer.datasets.get_ptb_words()
    train: np.ndarray = train
    val: np.ndarray = val
    counts = collections.Counter(train)
    counts.update(collections.Counter(val))
    n_vocab: int = max(train) + 1

    assert len(train.shape) == 1
    assert len(val.shape) == 1

    if args.test:
        train: np.ndarray = train[:100]
        val: np.ndarray = val[:100]

    vocab: Dict[str, int] = chainer.datasets.get_ptb_words_vocabulary()
    index2word: Dict[int, str] = {wid: word for word, wid in vocab.items()}

    print('n_vocab: %d' % n_vocab)
    print('data length: %d' % len(train))

    if args.out_type == 'hsm':
        HSM = L.BinaryHierarchicalSoftmax
        tree = HSM.create_huffman_tree(counts)
        loss_func = HSM(args.unit, tree)
        loss_func.W.data[...] = 0
    elif args.out_type == 'ns':
        cs = [counts[w] for w in range(len(counts))]
        loss_func = L.NegativeSampling(args.unit, cs, args.negative_size)
        loss_func.W.data[...] = 0
    elif args.out_type == 'original':
        loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab)
    else:
        raise Exception('Unknown output type: {}'.format(args.out_type))

    if args.model == 'skipgram':
        model = SkipGram(n_vocab, args.unit, loss_func)
    elif args.model == 'cbow':
        model = ContinuousBoW(n_vocab, args.unit, loss_func)
    else:
        raise Exception('Unknown model type: {}'.format(args.model))

    if args.gpu >= 0:
        model.to_gpu()

    optimizer = O.Adam()
    optimizer.setup(model)

    train_iter = WindowIterator(train, args.window, args.batchsize)
    val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        extensions.Evaluator(val_iter,
                             model,
                             converter=convert,
                             device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    with open('word2vec.model', 'w') as f:
        f.write('%d %d\n' % (len(index2word), args.unit))
        w = cuda.to_cpu(model.embed.W.data)
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2word[i], v))
Esempio n. 26
0
def train(**args):
    set_seed(42)
    args = EasyDict(args)
    logger.info(args)
    dataset_file = Path(args.dataset_file)

    data = json.loads(dataset_file.read_text())
    ladder = data['ladder']
    train_data, valid_data = data['train'], data['valid']

    counter = Counter()
    pokes = train_data + valid_data
    for poke in pokes:
        counter.update(poke)

    counts = [0] * (args.topk + 1)
    index2poke = ['<unk>']
    for i, (name, freq) in enumerate(counter.most_common()):
        if i < args.topk:
            counts[i + 1] = freq
            index2poke.append(name)
        else:
            counts[0] += freq
    vocab = {x: i for i, x in enumerate(index2poke)}
    n_vocab = len(vocab)
    logger.info('n_vocab = {}'.format(n_vocab))

    train_data = vectorize(train_data, vocab)
    valid_data = vectorize(valid_data, vocab)

    X_valid, y_valid = convert(valid_data)
    X_train, y_train = convert(train_data)

    train = TupleDataset(X_train, y_train)
    valid = TupleDataset(X_valid, y_valid)

    logger.info('train size = {}'.format(len(train)))
    logger.info('valid size = {}'.format(len(valid)))

    train_iter = chainer.iterators.SerialIterator(train, 32)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  32,
                                                  repeat=False,
                                                  shuffle=False)
    if args.loss_func == 'softmax':
        loss_func = SoftmaxCrossEntropyLoss(args.n_units, n_vocab)
    elif args.loss_func == 'ns':
        loss_func = L.NegativeSampling(args.n_units, counts,
                                       args.negative_size)
        loss_func.W.data[...] = 0
    else:
        raise ValueError('invalid loss_func: {}'.format(args.loss_func))

    prefix = '{}_{}_{}'.format(ladder, args.loss_func, args.n_units)

    model = ContinuousBoW(n_vocab, args.n_units, loss_func)
    optimizer = O.Adam()
    optimizer.setup(model)

    updater = training.updater.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (10, 'epoch'), out='results')
    trainer.extend(extensions.Evaluator(valid_iter, model))
    trainer.extend(extensions.LogReport(log_name='{}_log'.format(prefix)))
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    # Save the word2vec model
    Path('results').mkdir(exist_ok=True)
    poke2vec_file = 'results/{}_poke2vec.model'.format(prefix)
    with open(poke2vec_file, 'w') as f:
        f.write('%d %d\n' % (n_vocab, args.n_units))
        w = model.embed.W.data
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2poke[i], v))