Exemple #1
0
                   ) and params.context_var_types[i] == 'numerical':
            context_vocabs[context_var] = None
            continue

        v = Vocab.MakeFromData([[u] for u in dataset.GetColumn(context_var)],
                               min_count=50,
                               no_special_syms=True)
        context_vocabs[context_var] = v
        print 'num {0}: {1}'.format(context_var, len(v))

    vocab.Save(os.path.join(args.expdir, 'word_vocab.pickle'))
    print 'vocab size {0}'.format(len(vocab))
    with open(os.path.join(args.expdir, 'context_vocab.pickle'), 'wb') as f:
        pickle.dump(context_vocabs, f)

    dataset.Prepare(vocab, context_vocabs)

else:
    vocab = Vocab.Load(os.path.join(args.expdir, 'word_vocab.pickle'))
    if params.splitter == 'word':
        char_vocab = Vocab.Load(os.path.join(args.expdir, 'char_vocab.pickle'))
    else:
        char_vocab = None
    with open(os.path.join(args.expdir, 'context_vocab.pickle'), 'rb') as f:
        context_vocabs = pickle.load(f)

use_nce_loss = args.mode == 'train'
if len(vocab) < 5000:  # disable NCE for small vocabularies
    use_nce_loss = False
if args.mode == 'classify' and len(vocab) > 5000:
    use_nce_loss = True