def main():
    util = TextUtil('input.txt')
    train_config = CharacterModelConfig(util.vocab_size)
    train_config.hidden_depth = 2
    train_config.batch_size = 256
    eval_config = deepcopy(train_config)
    eval_config.batch_size = eval_config.seq_length = 1
    eval_config.keep_prob = 1.0

    print train_config

    tf.reset_default_graph()

    with tf.variable_scope('model', reuse=None):
        train_model = CharacterModel(train_config)
    with tf.variable_scope('model', reuse=True):
        eval_model = CharacterModel(eval_config)

    with tf.Session() as sess:
        loss_pp_iter = train(sess,
                             train_model,
                             util,
                             num_epochs=50,
                             eval_model=eval_model)
        for i in xrange(10):
            print '\n\nSample sentence %d' % (i + 1)
            print sample(sess, eval_model, util, 'The', length=60)
            print '\n'
def main():
    args = arg_parse()
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
    log = logging.getLogger(__name__)
    log.info(vars(args))
    #embedding = text.embedding.CustomEmbedding(args.embedding_file, elem_delim = ' ')
    #embedding.update_token_vectors('<unk>', nd.uniform(low = -0.05, high = 0.05, shape = 300))

    embedding = TextUtil(args.words_file, args.embedding_file)
    log.info(
        'embedding file loaded, # of words = {:d}, dimension = {:d}'.format(
            len(embedding), embedding.vec_len))

    data = {}
    for target in ['train', 'valid', 'test']:
        raw_data, label = load_raw_text(args.data_folder +
                                        '{:s}.txt'.format(target),
                                        is_test=args.is_test == 1)
        enc = onehot_enc(raw_data, embedding, args.sent_len, args.list_len,
                         args.mode)
        data[target] = {}
        data[target]['data'] = enc
        data[target]['label'] = label
        log.info('# of {:s} examples = {:d}'.format(target, len(enc)))


#    data = {}
#    data_tuple = load_encoded_csv('{:s}/raw_data.csv'.format(args.data_folder), args.num_workers)
#    targets = ['train', 'valid', 'test']
#    interval = [(0, 0.8), (0.8, 0.9), (0.9, 1)]
#    for i in range(len(targets)):
#        target = targets[i]
#        start = int(interval[i][0] * len(data_tuple))
#        end = int(interval[i][1] * len(data_tuple))
#        data[target] = {}
#        data[target]['label'] = [tup[1] for tup in data_tuple[start : end]]
#        data[target]['data'] = [tup[2] for tup in data_tuple[start : end]]
#        log.info('# of {:s} examples = {:d}'.format(target, len(data[target]['label'])))

    with open('{:s}/{:s}'.format(args.data_folder, args.data_out_file),
              'wb') as f:
        msgpack.dump(data, f)

    meta = {
        'embedding_file': args.embedding_file,
        'embedding_dim': embedding.vec_len,
        'vocab_size': len(embedding),
        'embedding': embedding.idx_to_vec,
    }
    with open('{:s}/{:s}'.format(args.data_folder, args.meta_out_file),
              'wb') as f:
        msgpack.dump(meta, f)