def main(): args = parse_args() gen_data(SEQ_LEN, args.start_range, args.end_range) batch_size = 100 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 if args.cpu: contexts = [mx.context.cpu(i) for i in range(1)] else: contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab(os.path.join(DATA_DIR, TRAIN_FILE)) def sym_gen(seq_len): return bi_lstm_unroll(seq_len, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab)) init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter(os.path.join(DATA_DIR, TRAIN_FILE), vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter(os.path.join(DATA_DIR, VALID_FILE), vocab, buckets, batch_size, init_states) if len(buckets) == 1: symbol = sym_gen(buckets[0]) else: symbol = sym_gen model = mx.model.FeedForward(ctx=contexts, symbol=symbol, num_epoch=num_epoch, learning_rate=learning_rate, momentum=momentum, wd=0.00001, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34)) model.fit( X=data_train, eval_data=data_val, eval_metric=mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50), ) model.save("sort")
def main(): args = parse_args() gen_data(SEQ_LEN, args.start_range, args.end_range) batch_size = 100 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 if args.cpu: contexts = [mx.context.cpu(i) for i in range(1)] else: contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab(os.path.join(DATA_DIR, TRAIN_FILE)) def sym_gen(seq_len): return bi_lstm_unroll(seq_len, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab)) init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter(os.path.join(DATA_DIR, TRAIN_FILE), vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter(os.path.join(DATA_DIR, VALID_FILE), vocab, buckets, batch_size, init_states) if len(buckets) == 1: symbol = sym_gen(buckets[0]) else: symbol = sym_gen model = mx.model.FeedForward(ctx=contexts, symbol=symbol, num_epoch=num_epoch, learning_rate=learning_rate, momentum=momentum, wd=0.00001, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34)) model.fit(X=data_train, eval_data=data_val, eval_metric = mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) model.save("sort")
def main(): tks = sys.argv[1:] assert len( tks ) >= 5, "Please provide 5 numbers for sorting as sequence length is 5" batch_size = 1 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 contexts = [mx.context.cpu(i) for i in range(1)] vocab = default_build_vocab(os.path.join(DATA_DIR, TRAIN_FILE)) rvocab = {} for k, v in vocab.items(): rvocab[v] = k _, arg_params, __ = mx.model.load_checkpoint("sort", 1) for tk in tks: assert ( tk in vocab ), "{} not in range of numbers that the model trained for.".format( tk) model = BiLSTMInferenceModel(SEQ_LEN, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab), arg_params=arg_params, ctx=contexts, dropout=0.0) data = np.zeros((1, len(tks))) for k in range(len(tks)): data[0][k] = vocab[tks[k]] data = mx.nd.array(data) prob = model.forward(data) for k in range(len(tks)): print(rvocab[np.argmax(prob, axis=1)[k]])
def main(): tks = sys.argv[1:] assert len(tks) >= 5, "Please provide 5 numbers for sorting as sequence length is 5" batch_size = 1 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 contexts = [mx.context.cpu(i) for i in range(1)] vocab = default_build_vocab(os.path.join(DATA_DIR, TRAIN_FILE)) rvocab = {} for k, v in vocab.items(): rvocab[v] = k _, arg_params, __ = mx.model.load_checkpoint("sort", 1) for tk in tks: assert (tk in vocab), "{} not in range of numbers that the model trained for.".format(tk) model = BiLSTMInferenceModel(SEQ_LEN, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab), arg_params=arg_params, ctx=contexts, dropout=0.0) data = np.zeros((1, len(tks))) for k in range(len(tks)): data[0][k] = vocab[tks[k]] data = mx.nd.array(data) prob = model.forward(data) for k in range(len(tks)): print(rvocab[np.argmax(prob, axis = 1)[k]])
if __name__ == '__main__': batch_size = 1 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab("./data/sort.train.txt") rvocab = {} for k, v in vocab.items(): rvocab[v] = k _, arg_params, __ = mx.model.load_checkpoint("sort", 1) model = BiLSTMInferenceModel(5, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab), arg_params=arg_params, ctx=contexts, dropout=0.0)
arr[:] = tmp if __name__ == '__main__': batch_size = 1 buckets = [] num_hidden = 300 num_embed = 512 num_lstm_layer = 2 num_epoch = 1 learning_rate = 0.1 momentum = 0.9 contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab("./data/sort.train.txt") rvocab = {} for k, v in vocab.items(): rvocab[v] = k _, arg_params, __ = mx.model.load_checkpoint("sort", 1) model = BiLSTMInferenceModel(5, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab), arg_params=arg_params, ctx=contexts, dropout=0.0) tks = sys.argv[1:] data = np.zeros((1, len(tks))) for k in range(len(tks)): data[0][k] = vocab[tks[k]]