def main(_): vocab = reader.get_vocab(FLAGS.vocab) test_ids, test_meta = reader.make_test(PDPATH('/test_data/'+FLAGS.test), vocab) model_path = PDPATH('/trained_models/') + FLAGS.model config = load_configs(model_path) with tf.Graph().as_default() as graph: with tf.Session() as session: test_input = TestData(config = config, test_data = test_ids, test_meta = test_meta, vocab=vocab, name="TestInput") with tf.variable_scope("Model"): mtest = Basic_LSTM_Model(is_training=False, config=config, input_=test_input) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) saved_files = os.listdir(model_path) for file in saved_files: if '.meta' in file: ckpt = file.split(sep='.')[0] saver.restore(session, os.path.join(model_path,ckpt)) continue np.set_printoptions(precision=4, suppress=False, linewidth=100) b = run_test(session=session, model=mtest, model_input=test_input) print(b) b = b / np.sum(b, axis=1).reshape([-1,1]) np.set_printoptions(precision=4, suppress=False, linewidth=100) print(b)
def __init__(self, data_bundle, vocabulary=None): #, c_len=None, q_len=None): (self.contexts, self.questions, self.choices, self.labels, self.choices_map, self.context_lens, self.qs_lens) = data_bundle if vocabulary: self.vocab = vocabulary else: self.vocab = rn.get_vocab(self.questions, self.contexts, min_frequency=10) self.vocab_size = len(self.vocab.vocabulary_) self.labels_idx = sorted( list( set([choice for choices in self.choices for choice in choices]))) print(self.contexts[:10]) print(self.questions[:10]) print(self.labels[:10]) self.transformed_labels_idx = [ x[0] for x in list(self.vocab.transform(self.labels_idx)) ] print(self.transformed_labels_idx) self.contexts = rn.vocab_transform(self.contexts, self.vocab) self.questions = rn.vocab_transform(self.questions, self.vocab)
def __init__(self, data_bundle, vocabulary=None):#, c_len=None, q_len=None): (self.contexts, self.questions, self.choices, self.labels, self.choices_map, self.context_lens, self.qs_lens) = data_bundle if vocabulary: self.vocab = vocabulary else: self.vocab = rn.get_vocab( self.questions, self.contexts, min_frequency=10) self.vocab_size = len(self.vocab.vocabulary_) self.labels_idx = sorted( list(set([choice for choices in self.choices for choice in choices])) ) self.transformed_labels_idx = [x[0] for x in\ list(self.vocab.transform(self.labels_idx))] print(self.transformed_labels_idx) print([x for x in self.questions if '@placeholder' not in x.split(" ")]) self.contexts = rn.vocab_transform(self.contexts, self.vocab) self.questions = rn.vocab_transform(self.questions, self.vocab) placeholder_token = ['@placeholder'] placeholder_idx = rn.vocab_transform(placeholder_token,self.vocab)[0][0] print(placeholder_idx) print(len([x for x in self.questions if placeholder_idx in x])) self.placeholder_inds = np.array([list(x).index(placeholder_idx)\ for x in self.questions]).astype(int) print(self.placeholder_inds.shape)
def vocab_demo(): v = rd.get_vocab('ptb.voc') items = [ 'the', 'dog', 'dogs', 'boy', 'boys', 'is', 'are', 'has', 'have', 'was', 'were' ] for i in items: print(i, v.getid(i))
def infer(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.vocab_size tar_vocab_size = args.vocab_size batch_size = args.batch_size init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size attr_init = args.attr_init latent_size = 32 if args.enable_ce: fluid.default_main_program().random_seed = 102 framework.default_startup_program().random_seed = 102 model = VAE(hidden_size, latent_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, attr_init=attr_init) beam_size = args.beam_size trans_res = model.build_graph(mode='sampling', beam_size=beam_size) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) dir_name = args.reload_model print("dir name", dir_name) dir_name = os.path.join(dir_name, "checkpoint") fluid.load(main_program, dir_name, exe) vocab, tar_id2vocab = get_vocab(args.dataset_prefix) infer_output = np.ones((batch_size, 1), dtype='int64') * BOS_ID fetch_outs = exe.run(feed={'tar': infer_output}, fetch_list=[trans_res.name], use_program_cache=False) with io.open(args.infer_output_file, 'w', encoding='utf-8') as out_file: for line in fetch_outs[0]: end_id = -1 if EOS_ID in line: end_id = np.where(line == EOS_ID)[0][0] new_line = [tar_id2vocab[e[0]] for e in line[1:end_id]] out_file.write(space_tok.join(new_line)) out_file.write(line_tok)
def main(): import reader as reader from trainer import Configs from PDPATH import PDPATH ptb_vocab = get_vocab('ptb.voc') raw_test_data = reader.make_test(PDPATH('/RNN/test_data/coffee.txt'), ptb_vocab) test_input = TestData(config=Configs(), test_data=raw_test_data, vocab=ptb_vocab, name="TestInput")
def __init__(self, config, data_path=None, vocabulary=None, name=None): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps raw_context, raw_questions, raw_choices, raw_labels, self.choices_map = \ read.load_data(data_path) all_choices = read.build_choices(raw_choices) self.epoch_size = ((len(raw_context) // batch_size) - 1) // num_steps # build vocab for train data if not vocabulary: self.vocabulary = read.get_vocab(raw_questions,\ raw_context,min_frequency=500) else: self.vocabulary = vocabulary raw_choices = [" ".join(x) for x in raw_choices] self.all_choices = read.vocab_transform(all_choices, self.vocabulary) self.questions = read.vocab_transform(raw_questions, self.vocabulary) self.context = read.vocab_transform(raw_context, self.vocabulary) self.labels = read.vocab_transform(raw_labels, self.vocabulary) self.choices = read.vocab_transform([" ".join(x) for x in raw_choices], self.vocabulary)
def __init__(self, data_bundle, vocabulary=None): (contexts, questions, self.choices, self.labels, self.choices_map, self.context_lens, self.qs_lens) = data_bundle if vocabulary: self.vocab = vocabulary else: self.vocab = rn.get_vocab(questions, contexts, min_frequency=FLAGS.min_freq) self.vocab_size = len(self.vocab.vocabulary_) self.labels_idx = sorted( list( set([choice for choices in self.choices for choice in choices]))) contexts = rn.vocab_transform(contexts, self.vocab) self.contexts = rn.pad_eval(contexts, FLAGS.context_steps) questions = rn.vocab_transform(questions, self.vocab) self.questions = rn.pad_eval(questions, FLAGS.question_steps)
level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename=datetime.now().strftime('mylogfile_%H_%M_%d_%m_%Y.log'), filemode='w') if __name__ == "__main__": # tf.reset_default_graph() options = get_params() root = "/Users/liuhongbing/Documents/tensorflow/data/snli_1.0/" train = [l.strip().split('\t') for l in open(root + 'snli_1.0_train.txt')] dev = [l.strip().split('\t') for l in open(root + 'snli_1.0_dev.txt')] test = [l.strip().split('\t') for l in open(root + 'snli_1.0_test.txt')] vocab = get_vocab(train) print("vocab (incr. maxfeatures accordingly):", len(vocab)) X_train, Y_train, Z_train = load_data(train, vocab) X_dev, Y_dev, Z_dev = load_data(dev, vocab) X_test, Y_test, Z_test = load_data(test, vocab) print('Build model...') model = build_model(options) config_str = getConfig(options) MODEL_ARCH = root + "/Attention_neural/arch_att" + config_str + ".yaml" MODEL_WGHT = root + "/Attention_neural/weights_att" + config_str + ".weights" MAXLEN = options.xmaxlen X_train = pad_sequences(X_train,
with open('POS/noun_inanim.txt', 'r') as file: all = file.readlines() iNs = all[0][:-2].split(sep=',') iNp = all[1].split(sep=',') with open('POS/verb_trans.txt', 'r') as file: all = file.readlines() tVs = all[0][:-2].split(sep=',') tVp = all[1].split(sep=',') with open('POS/verb_intrans.txt', 'r') as file: all = file.readlines() iVs = all[0][:-2].split(sep=',') iVp = all[1].split(sep=',') with open('ptb_adjs.txt', 'r') as file: A = file.readline().split(sep=',') sample = random.randint(20, 50) print('Here are some singular samples:') print( ' Anim: {}\n Inanim: {}\n Trans: {}\n Intrans: {}\n Adj: {}\n'.format( aNs[sample], iNs[sample], tVs[sample], iVs[sample], A[sample])) print('And ere are some plural samples:') print( ' Anim: {}\n Inanim: {}\n Trans: {}\n Intrans: {}\n Adj: {}\n'.format( aNp[sample], iNp[sample], tVp[sample], iVp[sample], A[sample + 1])) V = rd.get_vocab('big_ptb.voc')
from reader import load_data from reader import get_vocab from reader import vocab_transform from reader import batch_iter contexts, questions, choices, labels, choices_map, context_lens, qs_lens =\ load_data(data_path="wdw/test") # # 2. Fit vocabulary with questions and context. vocab = get_vocab(contexts, questions) # # 3. Transform context and questions contexts = vocab_transform(contexts, vocab) questions = vocab_transform(questions, vocab) # 4. Give to batch_iter readers = batch_iter(contexts, questions, choices, labels, choices_map, context_lens, qs_lens) # for q, c, ch, lab, ch_map, c_lens, q_lens in readers: # print(c.shape) # break