def main(mutations_file, data_dir): mutations = read_data(mutations_file) entity = mutations["entity"] cur_file = f"{data_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) new_data = process_data(cur_data=cur_data, mutations=mutations) out_file = cur_file write_data(new_data, out_file)
def main(new_file, cur_dir, out_dir): new_data = read_data(new_file) entity = new_data["entity"] cur_file = f"{cur_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) mutations = process_data(cur_data=cur_data, new_data=new_data) out_file = f"{out_dir}/mutations.json" write_data(mutations, out_file)
def lstm_classifier(data_dir, label_dir, to_dir): # Create a Keras lstm model batch_size = 256 # Read in the local data with Shakespearean content x_train, y_train, x_valid, y_valid, vocab = reader.read_data(data_dir, label_dir) # Create a linear stack of models model = Sequential() model.add(Embedding(len(vocab)+1, 300, input_length=15)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='tanh')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=5) score = model.evaluate(x_valid, y_valid, batch_size=batch_size) print('test loss:', score[0]) print('test accuracy:', score[1]) # Save the model model.save(to_dir) return history
def main(input_dir, output_dir, entity): cur_file = f"{input_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) labels, lines = process_data(cur_data=cur_data) write_data(labels, lines, output_dir)
def main(_): if not FLAGS.data_path: raise ValueError('Must set --data_path') print ' '.join(sys.argv) config = Config() train, dev, word2id, tag2id = \ reader.read_data(FLAGS.data_path, config.batch_size) id2word = sorted(word2id, key=word2id.get) id2tag = sorted(tag2id, key=tag2id.get) config.word_vocab_size, config.tag_vocab_size = len(word2id), len(tag2id) print 'batch_size: %d' % config.batch_size print 'init_scale: %.2f' % config.init_scale print 'keep_prob: %.2f' % config.keep_prob print 'learning_rate: %.5f' % config.learning_rate print 'lr_decay: %.2f' % config.lr_decay print 'max_epoch: %d' % config.max_epoch print 'max_grad_norm: %d' % config.max_grad_norm print 'max_max_epoch: %d' % config.max_max_epoch print 'num_layers: %d' % config.num_layers print 'rnn_size: %d' % config.rnn_size print 'use_peepholes: %r' % config.use_peepholes sys.stdout.flush() with tf.Graph().as_default(), tf.Session() as sess: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = Model(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): m_dev = Model(is_training=False, config=config) tf.initialize_all_variables().run() prev = float('inf') lr_decay = 1. for i in xrange(config.max_max_epoch): start_time = time.time() shuffle(train) # lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(sess, config.learning_rate * lr_decay) print 'epoch: %d learning rate: %.3e' % (i + 1, sess.run(m.lr)) train_loss, train_perp, train_acc = \ run_epoch(sess, m, train, m.train_op, verbose=True) print '%d, train loss: %.2f, perp: %.4f, acc: %.2f' \ % (i+1, train_loss, train_perp, train_acc) dev_loss, dev_perp, dev_acc = run_epoch(sess, m_dev, dev, tf.no_op()) print '%d, dev loss: %.2f, perp: %.4f, acc: %.2f' % \ (i+1, dev_loss, dev_perp, dev_acc) if prev < dev_loss: lr_decay *= config.lr_decay prev = dev_loss print 'it took %.2f seconds' % (time.time() - start_time) sys.stdout.flush()
def load_model_(data_dir, label_dir, dir): # load and evaluate a saved model model = load_model(dir) # summarize model. print(model.summary()) _, _, x_valid, y_valid, _ = reader.read_data(data_dir, label_dir) score = model.evaluate(x_valid, y_valid) print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # acc: 94.85%
def main(input_dir, output_dir, entity): cur_file = f"{input_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) new_data = process_data(cur_data=cur_data) out_file = f"{output_dir}/{entity.lower()}.json" write_data(new_data, out_file)
def test_reader(self): target = [{ 'src': '13.43.13.123', 'dst': '85.123.34.1' }, { 'src': '45.14.153.12', 'dst': '198.12.155.62' }] file = 'test.pcap' self.assertEqual(target, reader.read_data(file))
def main(args): utils.init_distributed_mode(args) print(args) device = torch.device(args.device) torch.backends.cudnn.benchmark = True # Data loading code print("Loading data") # traindir = os.path.join(args.data_path, 'train.txt') # valdir = os.path.join(args.data_path, 'val.txt') # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) traindir = r'./datasets/Corel5k/train.txt' valdir = r'./datasets/Corel5k/val.txt' print("Creating data loaders") data_loader, data_loader_test = reader.read_data(traindir=traindir, valdir=valdir, batch_size=args.batch_size, num_works=args.workers) print("Creating model") model = models.__dict__[args.model](pretrained=True) model.to(device) if args.distributed: model = torch.nn.utils.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module criterion = nn.BCELoss() optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) if args.test_only: evaluate(model, criterion, data_loader, device=device) sio.savemat('feat_train.mat', {'feat_train': feat_test}) return
def plot_image(path): data = reader.read_data(path) fig = matplotlib.pyplot.figure(figsize=(16, 16)) ax = fig.add_subplot(111) def animate(i): im = ax.imshow(np.flipud(data[:, :, i].transpose()), cmap='viridis') return [im] return matplotlib.animation.FuncAnimation(fig, animate, frames=data.shape[2], interval=200, blit=True)
def __init__(self, info): from reader import read_data config = read_data(info) self.shape = config['shape'] self.x_o1, self.y0, self.z_r = config['x_o1'], config['y0'], config[ 'z_r'] self.z_o, self.z_r = config['z_o'], config['z_r'] self.S_o, self.S_b = config['S_o'], config['S_b'] self.w_b = config['w_b'] self.R_br, self.R_lb, self.R_bt = config['R_br'], config[ 'R_lb'], config['R_bt'] self.cell_size = config['cell_size'] self.set_params()
def find_R(nazir_ip, mix_ip, m, pcap_file): R_list = [] nazir_sending = False current_ri = [] for packet in reader.read_data(pcap_file): if not packet['src'] == mix_ip and current_ri: R_list += [set(current_ri)] current_ri = [] nazir_sending = False if packet['dst'] == mix_ip and packet['src'] == nazir_ip: nazir_sending = True elif packet['src'] == mix_ip and nazir_sending: current_ri += [packet['dst']] return R_list
def sample(FLAGS, n=10): data_path = os.path.join(FLAGS.data_dir, FLAGS.data_name + "test.txt") with open(os.path.join(FLAGS.checkpoint_dir, FLAGS.data_name + ".config"), "rb") as f: config = pkl.load(f) FLAGS.mem_size = config["mem_size"] FLAGS.nwords = config["nwords"] FLAGS.vocab = config["vocab"] with tf.Session() as sess: m = QAModelN2N(FLAGS, sess) m.build_model() m.load(FLAGS.checkpoint_dir) generator = read_data(data_path, m.vocab) rev_vocab = {v: k for k, v in m.vocab.iteritems()} for i, (x, q, a) in enumerate(generator): if i == n: break print("CONTEXT: " + " ".join([rev_vocab[xi] for xi in x])) print("QUESTION: " + " ".join([rev_vocab[xi] for xi in q])) print("PREDICTED ANSWER: " + m.sample(x, q, rev_vocab)) print("ACTUAL ANSWER: " + rev_vocab[a[0]]) print("=" * 80)
def see(ls, ext, path): index = 0 key = 0 slc = 0 while key != 'q': key = 0 data = reader.read_data(path + ls[index] + ext) # (horizontal axis(?), depth axis(front->back), height(floor->ceil) data = np.flipud(data.transpose([1, 0, 2])) data /= np.max(data) while key not in ['q', 'n', 'p']: cv2.imshow('viewer', data[:, :, slc]) key = chr(cv2.waitKey() % 256) if key in ['n', 'p']: continue elif key == 'b': slc = (slc - 1 + data.shape[2]) % data.shape[2] else: slc = (slc + 1) % data.shape[2] if key == 'n': index += 1 elif key == 'p': index -= 1 index = max(0, min(index, len(ls) - 1))
def train(self, data_path, epochs=100): merged_sum = tf.merge_all_summaries() writer = tf.train.SummaryWriter("./logs/{}".format(self.get_model_name()), self.sess.graph) data_size = 1 for epoch in xrange(epochs): generator = read_data(data_path, self.vocab) lr = self.init_lr if epoch % 25 == 0 and 0 < epoch < 100: lr /= 2. for step,(x,q,a) in enumerate(generator): _, loss, summary = self.sess.run( [self.optim, self.loss, merged_sum], feed_dict={self.inputs: x, self.q: q, self.a: a, self.lr: lr}) if step % 10 == 0: print("Epoch: {}, Step: {}, loss: {}".format(epoch, epoch*data_size + step, loss)) if step % 2 == 0: writer.add_summary(summary, epoch*data_size + step) if step % 500 == 0: self.save(global_step=step) data_size = step + 1
def main(_): if os.path.exists(config.forward_log_path) and config.mode == 'forward': os.system('rm ' + config.forward_log_path) if os.path.exists(config.backward_log_path) and config.mode == 'backward': os.system('rm ' + config.backward_log_path) if os.path.exists(config.use_output_path): os.system('rm ' + config.use_output_path) for item in config.record_time: if os.path.exists(config.use_output_path + str(item)): os.system('rm ' + config.use_output_path + str(item)) if os.path.exists(config.use_log_path): os.system('rm ' + config.use_log_path) if config.mode == 'forward' or config.mode == 'use': with tf.name_scope("forward_train"): with tf.variable_scope("forward", reuse=None): m_forward = PTBModel(is_training=True) with tf.name_scope("forward_test"): with tf.variable_scope("forward", reuse=True): mtest_forward = PTBModel(is_training=False) var = tf.trainable_variables() var_forward = [x for x in var if x.name.startswith('forward')] saver_forward = tf.train.Saver(var_forward, max_to_keep=1) if config.mode == 'backward' or config.mode == 'use': with tf.name_scope("backward_train"): with tf.variable_scope("backward", reuse=None): m_backward = PTBModel(is_training=True) with tf.name_scope("backward_test"): with tf.variable_scope("backward", reuse=True): mtest_backward = PTBModel(is_training=False) var = tf.trainable_variables() var_backward = [x for x in var if x.name.startswith('backward')] saver_backward = tf.train.Saver(var_backward, max_to_keep=1) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) if config.mode == 'forward': #train forward language model train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old = 15.0 for epoch in range(config.max_epoch): train_ppl_list = [] test_ppl_list = [] for i in range(train_data.length // config.batch_size): input, sequence_length, target = train_data( m_forward.batch_size, i) train_perplexity = run_epoch(session, m_forward, input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length // config.batch_size): input, sequence_length, target = test_data( mtest_forward.batch_size, i) test_perplexity = run_epoch(session, mtest_forward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean = np.mean(test_ppl_list) if test_mean < test_mean_old: test_mean_old = test_mean saver_forward.save(session, config.forward_save_path) write_log( 'train ppl:' + str(np.mean(train_ppl_list)) + '\t' + 'test ppl:' + str(test_mean), config.forward_log_path) if config.mode == 'backward': #train backward language model train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old = 15.0 for epoch in range(config.max_epoch): train_ppl_list = [] test_ppl_list = [] for i in range(train_data.length // config.batch_size): input, sequence_length, target = train_data( m_backward.batch_size, i) input, sequence_length, target = reverse_seq( input, sequence_length, target) train_perplexity = run_epoch(session, m_backward, input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length // config.batch_size): input, sequence_length, target = test_data( mtest_backward.batch_size, i) input, sequence_length, target = reverse_seq( input, sequence_length, target) test_perplexity = run_epoch(session, mtest_backward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean = np.mean(test_ppl_list) if test_mean < test_mean_old: test_mean_old = test_mean saver_backward.save(session, config.backward_save_path) write_log( 'train ppl:' + str(np.mean(train_ppl_list)) + '\t' + 'test ppl:' + str(test_mean), config.backward_log_path) if config.mode == 'use': #CGMH sampling for sentence_correction sim = config.sim sta_vec = list(np.zeros([config.num_steps - 1])) saver_forward.restore(session, config.forward_save_path) saver_backward.restore(session, config.backward_save_path) config.shuffle = False #erroneous sentence input if config.keyboard_input == True: #input from keyboard if key_input is not empty key_input = raw_input('please input a sentence\n') if key_input == '': use_data = reader.read_data_use(config.use_data_path, config.num_steps) else: sta_vec_list = [sen2sta_vec(key_input)] key_input = key_input.split() #key_input=sen2id(key_input) use_data = [key_input] else: #load keywords from file use_data = [] with open(config.use_data_path) as f: for line in f: use_data.append(line.strip().split()) config.batch_size = 1 for sen_id in range(len(use_data)): #generate for each sentence input_ = use_data[sen_id] pos = 0 for iter in range(config.sample_time): #ind is the index of the selected word, regardless of the beginning token. sta_vec = sen2sta_vec(' '.join(input_)) input__ = reader.array_data([sen2id(input_)], config.num_steps, config.dict_size) input, sequence_length, _ = input__(1, 0) input_original = input[0] ind = pos % (sequence_length[0] - 1) print(' '.join(input_)) if iter in config.record_time: with open(config.use_output_path + str(iter), 'a') as g: g.write(' '.join(input_) + '\n') if True: prob_old = run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM == True: input_backward, _, _ = reverse_seq( input, sequence_length, input) prob_old = (prob_old + run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use')) * 0.5 tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_old[0][j][input[0][j + 1]] tem *= prob_old[0][j + 1][config.dict_size + 1] prob_old_prob = tem if sim != None: similarity_old = similarity( input[0], input_original) prob_old_prob *= similarity_old else: similarity_old = -1 input_candidate_ = generate_change_candidate( input_, ind) tem = reader.array_data( [sen2id(x) for x in input_candidate_], config.num_steps, config.dict_size) input_candidate, sequence_length_candidate, _ = tem( len(input_candidate_), 0) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(len(input_candidate_)): tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array(prob_candidate) if sim != None: similarity_candidate = similarity_batch( input_candidate, input_original) prob_candidate = prob_candidate * similarity_candidate prob_candidate_norm = normalize(prob_candidate) prob_candidate_ind = sample_from_candidate( prob_candidate_norm) prob_change_prob = prob_candidate[prob_candidate_ind] input_change_ = input_candidate_[prob_candidate_ind] #word replacement (action: 0) if True: if False: pass else: input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point( input, sequence_length, ind, mode=0) prob_forward = run_epoch( session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind % (sequence_length[0] - 1), :] prob_backward = run_epoch( session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0] - 1 - ind % (sequence_length[0] - 1), :] prob_mul = (prob_forward * prob_backward) input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, prob_mul, config.search_size, mode=1) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(config.search_size): tem = 1 for j in range(sequence_length_candidate[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][ config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array(prob_candidate) if config.sim_word == True: similarity_candidate = similarity_batch( input_candidate[:, ind + 1:ind + 2], input_original[ind + 1:ind + 2]) prob_candidate = prob_candidate * similarity_candidate prob_candidate_norm = normalize(prob_candidate) prob_candidate_ind = sample_from_candidate( prob_candidate_norm) prob_candidate_prob = prob_candidate[ prob_candidate_ind] prob_changeanother_prob = prob_candidate_prob word = id2sen( input_candidate[prob_candidate_ind])[ind] input_changeanother_ = input_[:ind] + [ word ] + input_[ind + 1:] #word insertion(action:1) if True: if sequence_length[0] >= config.num_steps: prob_add_prob = 0 pass else: input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point( input, sequence_length, ind, mode=1) prob_forward = run_epoch( session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind % (sequence_length[0] - 1), :] prob_backward = run_epoch( session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0] - 1 - ind % (sequence_length[0] - 1), :] prob_mul = (prob_forward * prob_backward) input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, prob_mul, config.search_size, mode=1) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(config.search_size): tem = 1 for j in range(sequence_length_candidate[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][ config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array(prob_candidate) #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate]) if sim != None: similarity_candidate = similarity_batch( input_candidate, input_original) prob_candidate = prob_candidate * similarity_candidate prob_candidate_norm = normalize(prob_candidate) prob_candidate_ind = sample_from_candidate( prob_candidate_norm) prob_candidate_prob = prob_candidate[ prob_candidate_ind] prob_add_prob = prob_candidate_prob word = id2sen( input_candidate[prob_candidate_ind])[ind] input_add_ = input_[:ind] + [word] + input_[ind:] #word deletion(action: 2) if True: if sequence_length[0] <= 2: prob_delete_prob = 0 pass else: input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, None, config.search_size, mode=2) prob_new = run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') tem = 1 for j in range(sequence_length_candidate[0] - 1): tem *= prob_new[0][j][input_candidate[0][j + 1]] tem *= prob_new[0][j + 1][config.dict_size + 1] prob_new_prob = tem if sim != None: similarity_new = similarity_batch( input_candidate, input_original) prob_new_prob = prob_new_prob * similarity_new prob_delete_prob = prob_new_prob input_delete_ = input_[:ind] + input_[ind + 1:] b = np.argmax([ prob_old_prob, prob_change_prob, prob_changeanother_prob * 0.3, prob_add_prob * 0.1, prob_delete_prob * 0.001 ]) print([ prob_old_prob, prob_change_prob, prob_changeanother_prob, prob_add_prob, prob_delete_prob ]) print([ input_, input_change_, input_changeanother_, input_add_, input_delete_ ]) input_ = [ input_, input_change_, input_changeanother_, input_add_, input_delete_ ][b] pos += 1
#!/usr/bin/env python import reader import sys import pypsignifit as pf import pypsignifit.psignipriors as pfp import pylab as pl import numpy as np import swignifit.swignifit_raw as sfr import integrate as ig # import pypsignifit.psigniplot as pp d,s = reader.read_data ( sys.argv[1] ) d = np.array(d) # stimulus_intensities = [0.0,2.0,4.0,6.0,8.0,10.0] # number_of_correct = [34,32,40,48,50,48] # number_of_trials = [50]*len(stimulus_intensities) # data = zip(stimulus_intensities,number_of_correct,number_of_trials) # d = np.array ( data ) model = {'nafc':1, 'sigmoid':"logistic", 'core':'mw0.1'} m = 4.0 w = 4.0 l = 0.05 g = 0.02 priors = ["Gauss(%f,%f)" % (m, m), "Gauss(%f,%f)" % (w, w*2), "Beta(2,50)", "Beta(1,50)"] # priors = (pfp.default_mid(d[:,0])[0],"Gamma(2,4)",pfp.default_lapse(),pfp.default_lapse()) # priors = ("Gauss(4,.1)","Gauss(4,.1)","Beta(2,50)","Beta(1,50)") print priors x,fx,priors = ig.integration_grid ( d )
from keras.layers import LSTM, Dense from reader import read_data def get_model(): my_model = Sequential() my_model.add(Dense(1024, activation="relu", input_shape=(1152, ))) my_model.add(Dense(2048, activation="relu")) my_model.add(Dense(4096, activation="relu")) my_model.add(Dense(3862, activation="sigmoid")) my_model.compile("adam", loss="binary_crossentropy", metrics=["accuracy"]) return my_model train = read_data("datasets/video_sample/train00.tfrecord") validation = read_data("datasets/video_sample/train01.tfrecord") model = get_model() early_stop = EarlyStopping(patience=4, monitor='val_loss') checkpoint = ModelCheckpoint( "weights.h5", monitor='val_loss', verbose=1, save_best_only=True, ) csv_logger = CSVLogger('v.csv') model.fit_generator(train, steps_per_epoch=50, epochs=50, validation_data=validation,
from reader import read_data dataset,vocab = read_data() print vocab
#!/usr/bin/env python from __future__ import print_function from reader import read_data import numpy as np def intersect(a, b): if a.right < b.left or b.right < a.left: return False if a.top < b.bottom or b.top < a.bottom: return False return True with open('data.txt', 'r') as f: data = read_data(f) if False: test = ('#1 @ 1,3: 4x4', '#2 @ 3,1: 4x4', '#3 @ 5,5: 2x2') data = read_data(test) #print(data) for i, a in enumerate(data): if all(map(lambda x: not intersect(a, x), data[:i] + data[i + 1:])): answer = a.id print('Answer:', answer) # 346
def main(_): if os.path.exists(config.forward_log_path) and config.mode=='forward': os.system('rm '+config.forward_log_path) if os.path.exists(config.backward_log_path) and config.mode=='backward': os.system('rm '+config.backward_log_path) if os.path.exists(config.use_output_path): os.system('rm '+config.use_output_path) for item in config.record_time: if os.path.exists(config.use_output_path+str(item)): os.system('rm '+config.use_output_path+str(item)) if os.path.exists(config.use_log_path): os.system('rm '+config.use_log_path) if config.mode=='forward' or config.mode=='use': with tf.name_scope("forward_train"): with tf.variable_scope("forward", reuse=None): m_forward = PTBModel(is_training=True) with tf.name_scope("forward_test"): with tf.variable_scope("forward", reuse=True): mtest_forward = PTBModel(is_training=False) var=tf.trainable_variables() var_forward=[x for x in var if x.name.startswith('forward')] saver_forward=tf.train.Saver(var_forward, max_to_keep=1) if config.mode=='backward' or config.mode=='use': with tf.name_scope("backward_train"): with tf.variable_scope("backward", reuse=None): m_backward = PTBModel(is_training=True) with tf.name_scope("backward_test"): with tf.variable_scope("backward", reuse=True): mtest_backward = PTBModel(is_training=False) var=tf.trainable_variables() var_backward=[x for x in var if x.name.startswith('backward')] saver_backward=tf.train.Saver(var_backward, max_to_keep=1) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) if config.mode=='forward': train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old=15.0 for epoch in range(config.max_epoch): train_ppl_list=[] test_ppl_list=[] for i in range(train_data.length//config.batch_size): input, sequence_length, target=train_data(m_forward.batch_size, i) train_perplexity = run_epoch(session, m_forward,input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length//config.batch_size): input, sequence_length, target=test_data(mtest_forward.batch_size, i) test_perplexity = run_epoch(session, mtest_forward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean=np.mean(test_ppl_list) if test_mean<test_mean_old: test_mean_old=test_mean saver_forward.save(session, config.forward_save_path) write_log('train ppl:'+str(np.mean(train_ppl_list))+'\t'+'test ppl:'+str(test_mean), config.forward_log_path) if config.mode=='backward': train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old=15.0 for epoch in range(config.max_epoch): train_ppl_list=[] test_ppl_list=[] for i in range(train_data.length//config.batch_size): input, sequence_length, target=train_data(m_backward.batch_size, i) input, sequence_length, target=reverse_seq(input, sequence_length, target) train_perplexity = run_epoch(session, m_backward,input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length//config.batch_size): input, sequence_length, target=test_data(mtest_backward.batch_size, i) input, sequence_length, target=reverse_seq(input, sequence_length, target) test_perplexity = run_epoch(session, mtest_backward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean=np.mean(test_ppl_list) if test_mean<test_mean_old: test_mean_old=test_mean saver_backward.save(session, config.backward_save_path) write_log('train ppl:'+str(np.mean(train_ppl_list))+'\t'+'test ppl:'+str(test_mean), config.backward_log_path) if config.mode=='use': sim=config.sim #keyword stable sta_vec=list(np.zeros([config.num_steps-1])) saver_forward.restore(session, config.forward_save_path) saver_backward.restore(session, config.backward_save_path) config.shuffle=False if config.keyboard_input==True: key_input=raw_input('please input a sentence in lower case\n') if key_input=='': use_data = reader.read_data_use(config.use_data_path, config.num_steps) else: key_input=key_input.split() key_input=sen2id(key_input) use_data = reader.array_data([key_input], config.num_steps, config.dict_size) else: use_data, sta_vec_list = reader.read_data_use(config.use_data_path, config.num_steps) config.batch_size=1 #use_data.length=1 ####################################### for sen_id in range(use_data.length): if config.keyboard_input==False: sta_vec=sta_vec_list[sen_id%len(sta_vec)] print(sta_vec) input, sequence_length, _=use_data(1, sen_id) input_original=input[0] for i in range(1,config.num_steps): if input[0][i]>config.rare_since and input[0][i]<config.dict_size: sta_vec[i-1]=1 pos=0 for iter in range(config.sample_time): #ind is the index of the selected word, regardless of the beginning token. ind=pos%(sequence_length[0]-1) action=choose_action(config.action_prob) #tem print(' '.join(id2sen(input[0]))) if iter in config.record_time: with open(config.use_output_path+str(iter), 'a') as g: g.write(' '.join(id2sen(input[0]))+'\n') #tem_end #print(sta_vec, sequence_length[0], ind) ''' if sta_vec[ind]==1 and action in [0, 2]: #stop skipping words action=3 ''' #change word if action==0: prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM==True: input_backward, _, _ =reverse_seq(input, sequence_length, input) prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5 tem=1 for j in range(sequence_length[0]-1): tem*=prob_old[0][j][input[0][j+1]] tem*=prob_old[0][j+1][config.dict_size+1] prob_old_prob=tem if sim!=None: similarity_old=similarity(input[0], input_original, sta_vec) prob_old_prob*=similarity_old else: similarity_old=-1 input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=action) prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:] prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:] prob_mul=(prob_forward*prob_backward) input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=action) prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM==True: input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5 prob_candidate=[] for i in range(config.search_size): tem=1 for j in range(sequence_length[0]-1): tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]] tem*=prob_candidate_pre[i][j+1][config.dict_size+1] prob_candidate.append(tem) prob_candidate=np.array(prob_candidate) #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate]) if sim!=None: similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec) prob_candidate=prob_candidate*similarity_candidate prob_candidate_norm=normalize(prob_candidate) prob_candidate_ind=sample_from_candidate(prob_candidate_norm) prob_candidate_prob=prob_candidate[prob_candidate_ind] if input_candidate[prob_candidate_ind][ind+1]<config.dict_size and ( prob_candidate_prob>prob_old_prob*config.threshold or just_acc()==0): input=input_candidate[prob_candidate_ind:prob_candidate_ind+1] pos+=1 #old_place=len(prob_mul)-list(np.argsort(prob_mul)).index(input[0][ind+1]) #write_log('step:'+str(iter)+'action:0 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_candidate_prob)+' '+str(old_place)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path) print('action:0', 1, prob_old_prob, prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old) #add word if action==1: if sequence_length[0]>=config.num_steps: action=3 else: input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=action) prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:] prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:] prob_mul=(prob_forward*prob_backward) input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=action) prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM==True: input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5 prob_candidate=[] for i in range(config.search_size): tem=1 for j in range(sequence_length_candidate[0]-1): tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]] tem*=prob_candidate_pre[i][j+1][config.dict_size+1] prob_candidate.append(tem) prob_candidate=np.array(prob_candidate) #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate]) if sim!=None: similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec) prob_candidate=prob_candidate*similarity_candidate prob_candidate_norm=normalize(prob_candidate) prob_candidate_ind=sample_from_candidate(prob_candidate_norm) prob_candidate_prob=prob_candidate[prob_candidate_ind] prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM==True: input_backward, _, _ =reverse_seq(input, sequence_length, input) prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5 tem=1 for j in range(sequence_length[0]-1): tem*=prob_old[0][j][input[0][j+1]] tem*=prob_old[0][j+1][config.dict_size+1] prob_old_prob=tem if sim!=None: similarity_old=similarity(input[0], input_original,sta_vec) prob_old_prob=prob_old_prob*similarity_old else: similarity_old=-1 alpha=min(1, prob_candidate_prob*config.action_prob[2]/(prob_old_prob*config.action_prob[1]*prob_candidate_norm[prob_candidate_ind])) #alpha=min(1, prob_candidate_prob*config.action_prob[2]/(prob_old_prob*config.action_prob[1])) print ('action:1',alpha, prob_old_prob,prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old) if choose_action([alpha, 1-alpha])==0 and input_candidate[prob_candidate_ind][ind]<config.dict_size and (prob_candidate_prob>prob_old_prob* config.threshold or just_acc()==0): #write_log('step:'+str(iter)+'action:1 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_candidate_prob)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path) input=input_candidate[prob_candidate_ind:prob_candidate_ind+1] sequence_length+=1 pos+=2 sta_vec.insert(ind, 0.0) del(sta_vec[-1]) else: action=3 #delete word if action==2: if sequence_length[0]<=2: action=3 else: prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM==True: input_backward, _, _ =reverse_seq(input, sequence_length, input) prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5 tem=1 for j in range(sequence_length[0]-1): tem*=prob_old[0][j][input[0][j+1]] tem*=prob_old[0][j+1][config.dict_size+1] prob_old_prob=tem if sim!=None: similarity_old=similarity(input[0], input_original,sta_vec) prob_old_prob=prob_old_prob*similarity_old else: similarity_old=-1 input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, None , config.search_size, mode=2) prob_new=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') tem=1 for j in range(sequence_length_candidate[0]-1): tem*=prob_new[0][j][input_candidate[0][j+1]] tem*=prob_new[0][j+1][config.dict_size+1] prob_new_prob=tem if sim!=None: similarity_new=similarity_batch(input_candidate, input_original,sta_vec) prob_new_prob=prob_new_prob*similarity_new input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=0) prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:] prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:] prob_mul=(prob_forward*prob_backward) input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=0) prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM==True: input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5 prob_candidate=[] for i in range(config.search_size): tem=1 for j in range(sequence_length[0]-1): tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]] tem*=prob_candidate_pre[i][j+1][config.dict_size+1] prob_candidate.append(tem) prob_candidate=np.array(prob_candidate) #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate]) if sim!=None: similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec) prob_candidate=prob_candidate*similarity_candidate #####There is a unsolved problem prob_candidate_norm=normalize(prob_candidate) if input[0] in input_candidate: for candidate_ind in range(len(input_candidate)): if input[0] in input_candidate[candidate_ind: candidate_ind+1]: break pass alpha=min(prob_candidate_norm[candidate_ind]*prob_new_prob*config.action_prob[1]/(config.action_prob[2]*prob_old_prob), 1) else: pass alpha=0 #alpha=min(prob_new_prob*config.action_prob[1]/(config.action_prob[2]*prob_old_prob), 1) print('action:2', alpha, prob_old_prob, prob_new_prob, prob_candidate_norm[candidate_ind], similarity_old) if choose_action([alpha, 1-alpha])==0 and (prob_new_prob> prob_old_prob*config.threshold or just_acc()==0): #write_log('step:'+str(iter)+'action:2 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_new_prob)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path) input=np.concatenate([input[:,:ind+1], input[:,ind+2:], input[:,:1]*0+config.dict_size+1], axis=1) sequence_length-=1 pos+=0 del(sta_vec[ind]) sta_vec.append(0) else: action=3 #do nothing if action==3: #write_log('step:'+str(iter)+'action:3', config.use_log_path) pos+=1
#!/usr/bin/env python from __future__ import print_function from reader import read_data from functools import reduce import numpy as np with open('data.txt','r') as f: data = read_data(f.readlines()) print(data) sleepiest_guard = max(data.values(), key=lambda g: g.total_sleeping_time) print('sleepiest_guard:', sleepiest_guard) #sleepiest_guard: Datum( # id=1601, # schedule=Schedule(schedule=array([ 1, 2, 3, 4, 4, 4, 6, 6, 6, 6, 6, 5, 5, 6, 7, 7, 6, 6, 6, 7, 9, 8, 7, 8, 9, 9, 10, 10, 9, 9, 9, 9, 9, 9, 11, 10, 10, 11, 11, 10, 11, 12, 12, 12, 12, 13, 14, 13, 12, 11, 11, 10, 10, 9, 9, 8, 8, 3, 3, 0]), # max=14, # argmax=46), # log=[(6, 16), (53, 59), (1, 11), (20, 57), (38, 48), (0, 35), (39, 54), (8, 21), (40, 53), (26, 59), (19, 39), (6, 8), (23, 49), (55, 57), (2, 22), (46, 57), (14, 40), (13, 39), (47, 55), (34, 47), (40, 47), (3, 28), (37, 53), (45, 55), (34, 38), (41, 51), (54, 57), (20, 57), (24, 59)], # total_sleeping_time=483, # longest_nap=37) answer = sleepiest_guard.id * sleepiest_guard.schedule.argmax print('Answer:', answer) #73646
from mpl_toolkits.mplot3d import Axes3D from sklearn.preprocessing import normalize import pylab as pl import numpy as np import gradient as gr import ml import reader xs, ys = reader.read_data("prices.txt") ys = ys / np.linalg.norm(ys) xs = normalize(xs, axis=0, norm="l1") x1s = xs[:, 0] x2s = xs[:, 1] fig = pl.figure() ax = fig.add_subplot(111, projection='3d') # tgt function is y = w0 + w * x + e fds = ml.folds(xs, ys, 10) fold = fds[0] alpha = 0.2 w = gr.gradient_method(fold["train_p"], fold["train_c"], alpha) print w X = np.arange(0, 0.05, 0.001) Y = np.arange(0, 0.03, 0.001) X, Y = np.meshgrid(X, Y)
def quartic_kernel(u): return 15. / 16. * (1 - u**2)**2 def quartic_kernel2(u): return 3. / 4. * (1 - min(u**2, 1.0)) def minkowski(a, b, p): return np.sum(np.abs(a - b)**p)**1. / p kernels = [gaussian_kernel, quartic_kernel, quartic_kernel2] x, y = reader.read_data('non-parametric.csv') metric = (lambda __x1, __x2: minkowski(__x1, __x2, 1)) min_mse = 99999999 min_a = [] for i in range(len(kernels)): kernel = kernels[i] for k in np.arange(0.05, 4., 0.05): # np.arange(4, 20): xs = np.array(x) # np.arange(min(x), max(x), 0.01) ys = [] for pt in xs: ys.append(kernel_smoothing.smooth(x, y, pt, metric, kernel, k)) mse = ml.mse(y, ys) if mse < min_mse: min_mse = mse
def train_ngram(data_path, list_file, out_file): list_path = os.path.join(data_path, list_file) with open(out_file, "w") as f: for file_data in reader.read_data(data_path, list_path): f.write(" ".join(file_data) + "\n")
args = parser.parse_args() if args.checkpoint is not None: args.output_dir = os.path.dirname(args.checkpoint) try: os.mkdir(args.output_dir) except FileExistsError: assert os.path.isdir( args.output_dir), 'output_dir should be a directory' logging.basicConfig(filename=os.path.join(args.output_dir, 'train.log'), format='[%(asctime)s] %(message)s', filemode='w' if args.checkpoint is None else 'a', level=logging.INFO) data = read_data() kfolds = model_selection.KFold(n_splits=10, shuffle=True, random_state=args.seed) train_index, val_index = next(kfolds.split(data)) train_data, val_data = [data[i] for i in train_index ], [data[i] for i in val_index] train_data = list(filter(lambda l: l.length > 0, train_data)) val_data = filter(lambda l: l.length > 0, val_data) val_data = list(sorted(val_data, key=attrgetter('length'))) if args.action is None: if args.checkpoint is not None: args.action = 'test' else: args.action = 'train'
sys.path.insert(0, '../utils') import reader from config import config config=config() from model import LangModel import argparse parser = argparse.ArgumentParser() parser.add_argument('--backward', dest='backward', action='store_true', help='train the backward model (default is forward)') parser.add_argument('-e', '--epoch', type=int, default=config.max_epoch, help="maximum number of epochs to run; default = {}".format(config.max_epoch)) parser.add_argument('-b', '--batch', type=int, default=config.batch_size, help="batch size; default = {}".format(config.batch_size)) args = parser.parse_args() from utils import * import numpy as np import tensorflow as tf # Define model and restore checkpoint if created model = LangModel(config.backward_save_path if args.backward else config.forward_save_path) model.restore() # Train chosen model print('Training {} language model'.format('backward' if args.backward else 'forward')) train_data, train_sequence_length, test_data, test_sequence_length = reader.read_data(config.data_path, config.num_steps, is_backward=True) model.compile() model.run(train_data, test_data, args.epoch, args.batch)
#!/usr/bin/env python import reader import sys import pypsignifit as pf import pypsignifit.psignipriors as pfp import pylab as pl import numpy as np import swignifit.swignifit_raw as sfr import integrate as ig # import pypsignifit.psigniplot as pp d, s = reader.read_data(sys.argv[1]) d = np.array(d) # stimulus_intensities = [0.0,2.0,4.0,6.0,8.0,10.0] # number_of_correct = [34,32,40,48,50,48] # number_of_trials = [50]*len(stimulus_intensities) # data = zip(stimulus_intensities,number_of_correct,number_of_trials) # d = np.array ( data ) model = {'nafc': 1, 'sigmoid': "logistic", 'core': 'mw0.1'} m = 4.0 w = 4.0 l = 0.05 g = 0.02 priors = [ "Gauss(%f,%f)" % (m, m), "Gauss(%f,%f)" % (w, w * 2), "Beta(2,50)", "Beta(1,50)" ] # priors = (pfp.default_mid(d[:,0])[0],"Gamma(2,4)",pfp.default_lapse(),pfp.default_lapse())
from reader import read_data ulamki = read_data() def nwd(a: int, b: int) -> bool: while b > 0: a, b = b, a % b return a def wzglednie_pierwsze(a: int, b: int) -> bool: return nwd(a, b) == 1 count = 0 for ulamek in ulamki: licznik = int(ulamek[0]) mianownik = int(ulamek[1]) # ułamek jest nieskracalny, gdy licznik i mianownik # są względnie pierwsze if wzglednie_pierwsze(licznik, mianownik): count += 1 print(f"{count=}")
def main(_): if os.path.exists(config.forward_log_path) and config.mode == 'forward': os.system('rm ' + config.forward_log_path) if os.path.exists(config.backward_log_path) and config.mode == 'backward': os.system('rm ' + config.backward_log_path) if os.path.exists(config.use_output_path): os.system('rm ' + config.use_output_path) if os.path.exists(config.use_output_path): os.system('rm ' + config.use_output_path) if os.path.exists(config.use_log_path): os.system('rm ' + config.use_log_path) if config.mode == 'forward' or config.mode == 'use': with tf.name_scope("forward_train"): with tf.variable_scope("forward", reuse=None): m_forward = PTBModel(is_training=True) with tf.name_scope("forward_test"): with tf.variable_scope("forward", reuse=True): mtest_forward = PTBModel(is_training=False) var = tf.trainable_variables() var_forward = [x for x in var if x.name.startswith('forward')] saver_forward = tf.train.Saver(var_forward, max_to_keep=1) if config.mode == 'backward' or config.mode == 'use': with tf.name_scope("backward_train"): with tf.variable_scope("backward", reuse=None): m_backward = PTBModel(is_training=True) with tf.name_scope("backward_test"): with tf.variable_scope("backward", reuse=True): mtest_backward = PTBModel(is_training=False) var = tf.trainable_variables() var_backward = [x for x in var if x.name.startswith('backward')] saver_backward = tf.train.Saver(var_backward, max_to_keep=1) init = tf.global_variables_initializer() configs = tf.ConfigProto() configs.gpu_options.allow_growth = True with tf.Session(config=configs) as session: session.run(init) if config.mode == 'forward': #train forward language model train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old = 15.0 for epoch in range(config.max_epoch): train_ppl_list = [] test_ppl_list = [] for i in range(train_data.length // config.batch_size): input, sequence_length, target = train_data( m_forward.batch_size, i) train_perplexity = run_epoch(session, m_forward, input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length // config.batch_size): input, sequence_length, target = test_data( mtest_forward.batch_size, i) test_perplexity = run_epoch(session, mtest_forward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean = np.mean(test_ppl_list) if test_mean < test_mean_old: test_mean_old = test_mean saver_forward.save(session, config.forward_save_path) write_log( 'train ppl:' + str(np.mean(train_ppl_list)) + '\t' + 'test ppl:' + str(test_mean), config.forward_log_path) if config.mode == 'backward': #train backward language model train_data, test_data = reader.read_data(config.data_path, config.num_steps) test_mean_old = 15.0 for epoch in range(config.max_epoch): train_ppl_list = [] test_ppl_list = [] for i in range(train_data.length // config.batch_size): input, sequence_length, target = train_data( m_backward.batch_size, i) input, sequence_length, target = reverse_seq( input, sequence_length, target) train_perplexity = run_epoch(session, m_backward, input, sequence_length, target, mode='train') train_ppl_list.append(train_perplexity) print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity)) for i in range(test_data.length // config.batch_size): input, sequence_length, target = test_data( mtest_backward.batch_size, i) input, sequence_length, target = reverse_seq( input, sequence_length, target) test_perplexity = run_epoch(session, mtest_backward, input, sequence_length, target, mode='test') test_ppl_list.append(test_perplexity) print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity)) test_mean = np.mean(test_ppl_list) if test_mean < test_mean_old: test_mean_old = test_mean saver_backward.save(session, config.backward_save_path) write_log( 'train ppl:' + str(np.mean(train_ppl_list)) + '\t' + 'test ppl:' + str(test_mean), config.backward_log_path) if config.mode == 'use': #CGMH sampling for key_gen sim = config.sim saver_forward.restore(session, config.forward_save_path) saver_backward.restore(session, config.backward_save_path) config.shuffle = False #keyword input if config.keyboard_input == True: #input from keyboard if key_input is not empty key_input = raw_input('please input a sentence\n') if key_input == '': use_data = reader.read_data_use(config.use_data_path, config.num_steps) else: key_input = key_input.split() key_input = sen2id(key_input) sta_vec = list(np.zeros([config.num_steps - 1])) for i in range(len(key_input)): sta_vec[i] = 1 use_data = reader.array_data([key_input], config.num_steps, config.dict_size) else: #load keywords from file use_data, sta_vec_list = reader.read_data_use( config.use_data_path, config.num_steps) config.batch_size = 1 for sen_id in range(use_data.length): #generate for each sequence of keywords if config.keyboard_input == False: sta_vec = sta_vec_list[sen_id % (config.num_steps - 1)] print(sta_vec) input, sequence_length, _ = use_data(1, sen_id) input_original = input[0] pos = 0 outputs = [] output_p = [] for iter in range(config.sample_time): #ind is the index of the selected word, regardless of the beginning token. #sample config.sample_time times for each set of keywords config.sample_prior = [1, 10.0 / sequence_length[0], 1, 1] if iter % 20 < 10: config.threshold = 0 else: config.threshold = 0.5 ind = pos % (sequence_length[0]) action = choose_action(config.action_prob) print(' '.join(id2sen(input[0]))) if sta_vec[ind] == 1 and action in [0, 2]: #skip words that we do not change(original keywords) action = 3 #word replacement (action: 0) if action == 0 and ind < sequence_length[0] - 1: prob_old = run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM == True: input_backward, _, _ = reverse_seq( input, sequence_length, input) prob_old = (prob_old + run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use')) * 0.5 tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_old[0][j][input[0][j + 1]] tem *= prob_old[0][j + 1][config.dict_size + 1] prob_old_prob = tem if sim != None: similarity_old = similarity( input[0], input_original, sta_vec) prob_old_prob *= similarity_old else: similarity_old = -1 input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point( input, sequence_length, ind, mode=action) prob_forward = run_epoch( session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind % (sequence_length[0] - 1), :] prob_backward = run_epoch( session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0] - 1 - ind % (sequence_length[0] - 1), :] prob_mul = (prob_forward * prob_backward) input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, prob_mul, config.search_size, mode=action) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(config.search_size): tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array(prob_candidate) if sim != None: similarity_candidate = similarity_batch( input_candidate, input_original, sta_vec) prob_candidate = prob_candidate * similarity_candidate prob_candidate_norm = normalize(prob_candidate) prob_candidate_ind = sample_from_candidate( prob_candidate_norm) prob_candidate_prob = prob_candidate[ prob_candidate_ind] if input_candidate[prob_candidate_ind][ ind + 1] < config.dict_size and ( prob_candidate_prob > prob_old_prob * config.threshold or just_acc() == 0): input = input_candidate[ prob_candidate_ind:prob_candidate_ind + 1] pos += 1 print('action:0', 1, prob_old_prob, prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old) if ' '.join(id2sen(input[0])) not in output_p: outputs.append( [' '.join(id2sen(input[0])), prob_old_prob]) #word insertion(action:1) if action == 1: if sequence_length[0] >= config.num_steps: action = 3 else: input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point( input, sequence_length, ind, mode=action) prob_forward = run_epoch( session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind % (sequence_length[0] - 1), :] prob_backward = run_epoch( session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0] - 1 - ind % (sequence_length[0] - 1), :] prob_mul = (prob_forward * prob_backward) input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, prob_mul, config.search_size, mode=action) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(config.search_size): tem = 1 for j in range(sequence_length_candidate[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][ config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array( prob_candidate) * config.sample_prior[1] if sim != None: similarity_candidate = similarity_batch( input_candidate, input_original, sta_vec) prob_candidate = prob_candidate * similarity_candidate prob_candidate_norm = normalize(prob_candidate) prob_candidate_ind = sample_from_candidate( prob_candidate_norm) prob_candidate_prob = prob_candidate[ prob_candidate_ind] prob_old = run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM == True: input_backward, _, _ = reverse_seq( input, sequence_length, input) prob_old = (prob_old + run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use')) * 0.5 tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_old[0][j][input[0][j + 1]] tem *= prob_old[0][j + 1][config.dict_size + 1] prob_old_prob = tem if sim != None: similarity_old = similarity( input[0], input_original, sta_vec) prob_old_prob = prob_old_prob * similarity_old else: similarity_old = -1 #alpha is acceptance ratio of current proposal alpha = min( 1, prob_candidate_prob * config.action_prob[2] / (prob_old_prob * config.action_prob[1] * prob_candidate_norm[prob_candidate_ind])) print('action:1', alpha, prob_old_prob, prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old) if ' '.join(id2sen(input[0])) not in output_p: outputs.append([ ' '.join(id2sen(input[0])), prob_old_prob ]) if choose_action([ alpha, 1 - alpha ]) == 0 and input_candidate[prob_candidate_ind][ ind + 1] < config.dict_size and ( prob_candidate_prob > prob_old_prob * config.threshold or just_acc() == 0): input = input_candidate[ prob_candidate_ind:prob_candidate_ind + 1] sequence_length += 1 pos += 2 sta_vec.insert(ind, 0.0) del (sta_vec[-1]) else: action = 3 #word deletion(action: 2) if action == 2 and ind < sequence_length[0] - 1: if sequence_length[0] <= 2: action = 3 else: prob_old = run_epoch(session, mtest_forward, input, sequence_length, mode='use') if config.double_LM == True: input_backward, _, _ = reverse_seq( input, sequence_length, input) prob_old = (prob_old + run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use')) * 0.5 tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_old[0][j][input[0][j + 1]] tem *= prob_old[0][j + 1][config.dict_size + 1] prob_old_prob = tem if sim != None: similarity_old = similarity( input[0], input_original, sta_vec) prob_old_prob = prob_old_prob * similarity_old else: similarity_old = -1 input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, None, config.search_size, mode=2) prob_new = run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') tem = 1 for j in range(sequence_length_candidate[0] - 1): tem *= prob_new[0][j][input_candidate[0][j + 1]] tem *= prob_new[0][j + 1][config.dict_size + 1] prob_new_prob = tem if sim != None: similarity_new = similarity_batch( input_candidate, input_original, sta_vec) prob_new_prob = prob_new_prob * similarity_new input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point( input, sequence_length, ind, mode=0) prob_forward = run_epoch( session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind % (sequence_length[0] - 1), :] prob_backward = run_epoch( session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0] - 1 - ind % (sequence_length[0] - 1), :] prob_mul = (prob_forward * prob_backward) input_candidate, sequence_length_candidate = generate_candidate_input( input, sequence_length, ind, prob_mul, config.search_size, mode=0) prob_candidate_pre = run_epoch( session, mtest_forward, input_candidate, sequence_length_candidate, mode='use') if config.double_LM == True: input_candidate_backward, _, _ = reverse_seq( input_candidate, sequence_length_candidate, input_candidate) prob_candidate_pre = ( prob_candidate_pre + run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use')) * 0.5 prob_candidate = [] for i in range(config.search_size): tem = 1 for j in range(sequence_length[0] - 1): tem *= prob_candidate_pre[i][j][ input_candidate[i][j + 1]] tem *= prob_candidate_pre[i][j + 1][ config.dict_size + 1] prob_candidate.append(tem) prob_candidate = np.array(prob_candidate) if sim != None: similarity_candidate = similarity_batch( input_candidate, input_original, sta_vec) prob_candidate = prob_candidate * similarity_candidate #alpha is acceptance ratio of current proposal prob_candidate_norm = normalize(prob_candidate) if input[0] in input_candidate: for candidate_ind in range( len(input_candidate)): if input[0] in input_candidate[ candidate_ind:candidate_ind + 1]: break pass alpha = min( prob_candidate_norm[candidate_ind] * prob_new_prob * config.action_prob[1] / (config.action_prob[2] * prob_old_prob), 1) else: pass alpha = 0 print('action:2', alpha, prob_old_prob, prob_new_prob, prob_candidate_norm[candidate_ind], similarity_old) if ' '.join(id2sen(input[0])) not in output_p: outputs.append([ ' '.join(id2sen(input[0])), prob_old_prob ]) if choose_action([ alpha, 1 - alpha ]) == 0 and (prob_new_prob > prob_old_prob * config.threshold or just_acc() == 0): input = np.concatenate([ input[:, :ind + 1], input[:, ind + 2:], input[:, :1] * 0 + config.dict_size + 1 ], axis=1) sequence_length -= 1 pos += 0 del (sta_vec[ind]) sta_vec.append(0) else: action = 3 #skip word (action: 3) if action == 3: #write_log('step:'+str(iter)+'action:3', config.use_log_path) pos += 1 print(outputs) if outputs != []: output_p.append(outputs[-1][0]) #choose output from samples for num in range(config.min_length, 0, -1): outputss = [x for x in outputs if len(x[0].split()) >= num] print(num, outputss) if outputss != []: break if outputss == []: outputss.append([' '.join(id2sen(input[0])), 1]) outputss = sorted(outputss, key=lambda x: x[1])[::-1] with open(config.use_output_path, 'a') as g: g.write(outputss[0][0] + '\n')
import reader data = reader.read_data('example.data') print(data)