def _pattern_data_sizes(width, height): row_nibbles = int(math.ceil(width / 4.0)) row_pad_bits = util.padding(width, 4) initial_padding = util.padding(row_nibbles * height, 2) return row_nibbles, row_pad_bits, initial_padding
def RUN_SVC(data): print('SVC') (train_data, train_labels), (test_data, test_labels) = data clf = SVC(C=0.1, gamma='auto') clf.fit(util.padding(train_data), train_labels) y_pred = clf.predict(util.padding(test_data)) print('Accuracy: {}'.format(accuracy_score(test_labels, y_pred)))
def evalution(sess, transition_params, dataset, x_, y_, output_keep_prob, test_unary_scores, test_seq_len): tokens = 0 corrects = 0 for i in range(dataset.numbers() // FLAGS.batch_size): batch_x, batch_y, _ = util.padding( *dataset.next_batch(FLAGS.batch_size)) batch_y = batch_y.reshape([FLAGS.batch_size, -1]) feed_dict = {x_: batch_x, y_: batch_y, output_keep_prob: 1} unary_scores, sequence_length = sess.run( [test_unary_scores, test_seq_len], feed_dict=feed_dict) transMatrix = sess.run(transition_params) for sent_unary_scores, y, sent_length in zip(unary_scores, batch_y, sequence_length): if sent_length != 0: sent_unary_scores = sent_unary_scores[:sent_length] y = y[:sent_length] viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( sent_unary_scores, transMatrix) corrects += np.sum(np.equal(viterbi_sequence, y)) tokens += sent_length else: continue print corrects, tokens, corrects / tokens return corrects / tokens
def dynamic_rnn(): # load data train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) embedding = tf.get_variable("embedding", [FLAGS.emb_size, FLAGS.word_dim], tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [None]) mask = tf.placeholder(tf.int32, [None]) output_keep_prob = tf.placeholder(tf.float32) # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(embedding, x_) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden, state_is_tuple=True, activation=tf.nn.relu) lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout) # Get lstm cell output outputs, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32) outputs = tf.reshape(outputs, [-1, FLAGS.n_hidden]) # define weights and biases of logistic layer with tf.variable_scope('linear'): weights = tf.get_variable("weight", [FLAGS.n_hidden, FLAGS.n_classes], tf.float32) biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32) logits = tf.matmul(outputs, weights) + biases #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits,y_) * tf.cast(mask,tf.float32)) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_)) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask correct = tf.reduce_sum(y) with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.initialize_all_variables()) FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(FLAGS.epoch_size * FLAGS.epoch_step): batch_x, batch_y, mask_feed = util.padding( *train_data.next_batch(FLAGS.batch_size)) sess.run(train_op, feed_dict={ x_: batch_x, y_: batch_y, output_keep_prob: 1 - FLAGS.dropout, mask: mask_feed }) if step % FLAGS.epoch_size == 0: evalution(sess, correct, x_, y_, mask, output_keep_prob, dev_data)
def __init__(self, x_, lbl_, dtype = [torch.LongTensor, torch.LongTensor]): x_ = util.padding(x_, 256) feature = Variable(dtype[0](x_)) label = Variable(dtype[1](lbl_)) self.X = feature self.Y = label
def gen_FeatureMatrix(news_file, price_file, stopWords_file, output, wordDict, dim_wordVec, sentense_len, term_type, mtype): with open(price_file) as file: print("Loading price info ...") priceDt = json.load(file)[term_type] cnt = 0 testDates = util.dateGenerator(300) os.system('rm ' + output + mtype) stopWords = set() with open(stopWords_file) as file: for word in file: stopWords.add(word.strip()) with open(news_file) as f: for line in f: line = line.strip().split(',') if len(line) != 6: continue ''' newsType: [topStory, normal] ''' ticker, name, day, headline, body, newsType = line if newsType != 'topStory': continue # skip normal news if ticker not in priceDt: continue # skip if no corresponding company found if day not in priceDt[ticker]: continue # skip if no corresponding date found cnt += 1 # if cnt > 20: continue if cnt % 1000 == 0: print("%sing samples %d" % (mtype, cnt)) if mtype == "test" and day not in testDates: continue if mtype == "train" and day in testDates: continue # 2.1 tokenize sentense, check if the word belongs to the top words, unify the format of words #headline = headline.encode('utf-8') #body = body.encode('utf-8') tokens = nltk.word_tokenize(headline) # + nltk.word_tokenize(body) tokens = map(util.unify_word, tokens) # build feature and label feature = np.zeros([0, dim_wordVec]) featureNone = True for t in tokens: # if t in stopWords: continue if t not in wordDict: continue featureNone = False feature = np.vstack((feature, np.matrix(wordDict[t]))) if featureNone: continue # feature is empty, continue feature = util.padding(feature, sentense_len) label = round(priceDt[ticker][day], 6) with open(output + mtype, 'a+') as file: np.savetxt(file, np.hstack((feature, np.matrix(label))), fmt='%.5f')
def __init__(self, path='../data/rt-polarity.all'): data = open(path, encoding='utf-8', errors='ignore').readlines() np.random.seed(0) np.random.shuffle(data) wv = LM.pretrained_2() x, y = load_data(data) x = map(tokenizer.tokenize, x) x = map(tokenizer.remove_stopwords, x) x = wv.preprocess(list(x)) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1) train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.1) max_len = 256 padding_item = [300 * [0]] train_x = util.padding(train_x, max_len=max_len, padding_item=padding_item) dev_x = util.padding(dev_x, max_len=max_len, padding_item=padding_item) test_x = util.padding(test_x, max_len=max_len, padding_item=padding_item) print(np.array(train_x).shape) print(np.array(dev_x).shape) print(np.array(test_x).shape) self.data = (train_x, train_y), (dev_x, dev_y), (test_x, test_y) train_type = [torch.FloatTensor, torch.LongTensor] val_type = [torch.FloatTensor, torch.LongTensor] test_type = [torch.FloatTensor, torch.LongTensor] self.data_type = [train_type, val_type, test_type]
def collate_fn(self, samples): batch = {} for key in ['id', 'len_text']: if any(key not in sample for sample in samples): continue batch[key] = [sample[key] for sample in samples] for key in ['text', 'summary', 'attention_mask']: if any(key not in sample for sample in samples): continue to_len = max([len(sample[key]) for sample in samples]) padd = padding([sample[key] for sample in samples], to_len, self.padding) batch[key] = torch.tensor(padd) return batch
def collate_fn(self, samples): batch = {} for key in ['id', 'sent_range', 'text_w', 'sent_range_w', 'summary']: if any(key not in sample for sample in samples): continue batch[key] = [sample[key] for sample in samples] for key in ['text', 'label']: if any(key not in sample for sample in samples): continue to_len = max([len(sample[key]) for sample in samples]) if to_len == 0: to_len = 1 padd = padding([sample[key] for sample in samples], to_len, self.padding if key != 'label' else -100) batch[key] = torch.tensor(padd) return batch
def load_data( self, data, data_type=[torch.LongTensor, torch.LongTensor] ): x, y = data x = util.padding(x) data = Variable(data_type[0](x)) target = Variable(data_type[1](y)) torch_dataset = Data.TensorDataset(data, target) print( 'data size:\t{}'.format( len(torch_dataset) ) ) data_loader = Data.DataLoader( dataset=torch_dataset, batch_size=self.batch_size ) return data_loader
def feed_all(x, y, train=False, Pad=False): m = (int)(len(y) / REDUCE_DATA_COUNT_RATIO) iter = (int)((m - 1) / BATCH + 1) acc_sum = np.zeros((1), np.float) for i in range(iter): start = i * BATCH end = np.minimum(start + BATCH, m) batch_x = x[start:end] if Pad: batch_x = util.padding(batch_x) else: batch_x = np.reshape(batch_x, [-1, h, w, 1]) feed = {X: batch_x, Y: y[start:end], D: train} if train: _, ML, acc = sess.run([train_step, loss, accuracy], feed) else: ML, acc = sess.run([loss, accuracy], feed) acc_sum += acc / iter return acc_sum, ML
def load(self, x_, lbl_, dtype=[torch.LongTensor, torch.LongTensor]): x_ = util.padding( x_, 256) # fill the x with same length to train the NN model. #TENSORIZE. feature = Variable(dtype[0](x_)) label = Variable(dtype[1](lbl_)) dataset = Data.TensorDataset(feature, label) print("Data size: ", len(dataset)) #PACK...into DataLoader obj.(which can implement batching feed) data_loader = Data.DataLoader(dataset, self.n_batch) return data_loader
def evalution(sess, correct, x_pl, y_pl, mask_pl, output_keep_prob_pl, dataset): n_epoch = dataset.numbers() // FLAGS.batch_size tokens = 0 corrects = 0 for step in range(n_epoch): batch_x, batch_y, mask_seed = util.padding( *dataset.next_batch(FLAGS.batch_size)) tokens += mask_seed.sum() corrects += sess.run(correct, feed_dict={ x_pl: batch_x, y_pl: batch_y, output_keep_prob_pl: 1, mask_pl: mask_seed }) print corrects, tokens, corrects / tokens
def evalution(sess, correct, x_pl, y_pl, mask_pl, output_keep_prob_pl, seq_len_pl, dataset): n_epoch = dataset.numbers() // FLAGS.batch_size tokens = 0 corrects = 0 for step in range(n_epoch): batch_x, batch_y, mask_seed = util.padding( *dataset.next_batch(FLAGS.batch_size)) sequence_length = batch_x.shape[1] * np.ones([FLAGS.batch_size], np.int32) tokens += mask_seed.sum() corrects += sess.run(correct, feed_dict={ x_pl: batch_x, y_pl: batch_y, output_keep_prob_pl: 1, mask_pl: mask_seed, seq_len_pl: sequence_length }) print corrects, tokens, corrects / tokens
def feed_all(x, y, train=False, Pad=False): m = (int)(len(y) / REDUCE_DATA_COUNT_RATIO) iter = (int)((m - 1) / BATCH + 1) acc_sum = np.zeros((1), np.float) for i in range(iter): start = i * BATCH end = np.minimum(start + BATCH, m) batch_x = x[start:end] if Pad: batch_x = util.padding(batch_x) else: batch_x = np.reshape(batch_x, [-1, h, w, 1]) feed = {X: batch_x, Y: y[start:end]} #equalRatio = np.mean(np.equal(y[::2], y[1::2])) #print (i,'equalRatio ',equalRatio ) if train: _, ML, RL, acc = sess.run( [train_step, margin_loss, restruc_loss, accuracy], feed) else: ML, RL, acc = sess.run([margin_loss, restruc_loss, accuracy], feed) acc_sum += acc / iter return acc_sum, ML, RL
def batch_padding(batch_data, pad_toks): assert len(batch_data) == len(pad_toks) batch_data_padded = [] comp_num = len(batch_data) for i in range(comp_num): comp = batch_data[i] pad_tok = pad_toks[i] sample = comp[0] if type(sample) == int or type(sample) == np.int64: pass else: max_len = 0 for sample in comp: cur_len = len(sample) if cur_len > max_len: max_len = cur_len comp = [padding(sample, max_len, pad_tok) for sample in comp] batch_data_padded.append(torch.LongTensor(comp)) return batch_data_padded
def main(arg=None): affNIST_in, affNIST_out = affNIST.load_affNIST() mnist = input_data.read_data_sets('/mnist') print('affNIST min', np.min(affNIST_in[0]), np.max(affNIST_in[0])) print(' MNIST min', np.min(mnist.train.images[0]), np.max(mnist.train.images[0])) trainIn, trainOut = util.skip_no_equal_neighbor(mnist.train.images, mnist.train.labels) validIn, validOut = util.skip_no_equal_neighbor(mnist.test.images, mnist.test.labels) affNIST_in, affNIST_out = util.skip_no_equal_neighbor( affNIST_in, affNIST_out) h = w = 28 if AFFIN: h = w = 40 X = tf.placeholder(tf.float32, [None, None, None, 1]) Y = tf.placeholder(tf.float32, [None]) y_int = tf.cast(Y, tf.int32) Y_ONE_HOT = tf.one_hot(y_int, 10) x_resize = tf.image.resize_bilinear(X, [28, 28]) x_overlap = tf.clip_by_value(x_resize[0::2] + x_resize[1::2], 0, 1) y_0 = Y_ONE_HOT[0::2] y_1 = Y_ONE_HOT[1::2] y_overlap = y_0 + y_1 y_overlap = tf.clip_by_value(y_overlap, 0, 1) DigitCaps = CapsuleLayer.capsnet_forward(x_overlap) hyperthesis = tf.norm(DigitCaps, ord=2, axis=-1) recon_x_0 = CapsuleLayer.reconstruct(DigitCaps, y_0) recon_x_1 = CapsuleLayer.reconstruct(DigitCaps, y_1) recon_x = tf.clip_by_value(recon_x_0 + recon_x_1, 0, 1) margin_loss = CapsuleLayer.margin_loss(y_overlap, hyperthesis) restruc_loss = tf.reduce_mean( tf.reduce_sum(tf.square(x_overlap - recon_x), axis=[1, 2])) loss = margin_loss if RECONSTRUCT: loss += 5e-5 * restruc_loss train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) top_values, top_predict = tf.nn.top_k(hyperthesis, 2) y_gt = tf.stack([y_int[0::2], y_int[1::2]], 1) predict_sort = tf.py_func(np.sort, [top_predict], tf.int32) y_gt_sort = tf.py_func(np.sort, [y_gt], tf.int32) accuracy = tf.reduce_mean( tf.cast(tf.equal(predict_sort, y_gt_sort), tf.float32)) sess = tf.Session() saver = tf.train.Saver() if isNewTrain: sess.run(tf.global_variables_initializer()) print('Initialized!') else: saver.restore(sess, modelName) print("Model restored") start_sec = time.time() print(' train:%d, valid:%d, test:%d, REDUCE_DATA_COUNT_RATIO:%d' % (len(mnist.train.images), len(affNIST_in), len( mnist.test.images), REDUCE_DATA_COUNT_RATIO)) def feed_all(x, y, train=False, Pad=False): m = (int)(len(y) / REDUCE_DATA_COUNT_RATIO) iter = (int)((m - 1) / BATCH + 1) acc_sum = np.zeros((1), np.float) for i in range(iter): start = i * BATCH end = np.minimum(start + BATCH, m) batch_x = x[start:end] if Pad: batch_x = util.padding(batch_x) else: batch_x = np.reshape(batch_x, [-1, h, w, 1]) feed = {X: batch_x, Y: y[start:end]} #equalRatio = np.mean(np.equal(y[::2], y[1::2])) #print (i,'equalRatio ',equalRatio ) if train: _, ML, RL, acc = sess.run( [train_step, margin_loss, restruc_loss, accuracy], feed) else: ML, RL, acc = sess.run([margin_loss, restruc_loss, accuracy], feed) acc_sum += acc / iter return acc_sum, ML, RL for i in range(epoch): train_accuracy, ML_tr, RL_tr = feed_all(trainIn, trainOut, train=True, Pad=True) if i < 10 or i % FREQ == 0: valid_accuracy, ML_v, RL_v = feed_all(validIn, validOut, train=False, Pad=True) test_accuracy, ML_te, RL_te = feed_all(affNIST_in, affNIST_out, train=False, Pad=False) now = strftime("%H:%M:%S", localtime()) print( 'step %d/%d, accuracy train:%.3f valid:%.3f test:%.3f loss:(%.7f, %.4f) %s' % (i, epoch, train_accuracy, valid_accuracy, test_accuracy, ML_tr, RL_tr, now)) this_sec = time.time() if i == epoch - 0 or this_sec - start_sec > 60 * 5: start_sec = this_sec save_path = saver.save(sess, modelName) print("Model Saved, time:%s, %s" % (now, save_path)) for i in range(10): start = i end = start + 2 batch_x = mnist.train.images[start:end] batch_x = util.padding(batch_x) batch_y = mnist.train.labels[start:end] feed = {X: batch_x, Y: batch_y} acc, x_overlap_in, recon_0, recon_1, ori_arr, y_gt_out, predict2 = sess.run( [ accuracy, x_overlap, recon_x_0, recon_x_1, x_resize, y_gt, top_predict ], feed) print('ori_arr', ori_arr.shape) print('recon_0', recon_0.shape) print('y_gt_out', y_gt_out) in_rgb = np.stack([x_overlap_in[0], x_overlap_in[0], x_overlap_in[0]], 2) r = ori_arr[0] g = ori_arr[1] b = np.zeros_like(r) ori_rgb = np.stack([r, g, b], 2) r = recon_0[0] g = recon_1[0] recon_rgb = np.stack([r, g, b], 2) dual_image = np.stack([in_rgb, ori_rgb, recon_rgb]) print('dual_image ', dual_image.shape) recon_image = np.reshape(dual_image, [28 * 3, 28, 3]) util.save(recon_image, y_gt_out, './reconstruct/', predict2) save_path = saver.save(sess, modelName)
def main(arg=None): affNIST_in,affNIST_out = affNIST.load_affNIST() mnist = input_data.read_data_sets('/mnist') print ('affNIST min',np.min(affNIST_in[0]),np.max(affNIST_in[0])) print (' MNIST min',np.min(mnist.train.images[0]),np.max(mnist.train.images[0])) h = w = 28 if AFFINE: h = w = 40 X = tf.placeholder(tf.float32, [None, None,None,1]) Y = tf.placeholder(tf.float32, [None]) y_int = tf.cast(Y, tf.int32) Y_ONE_HOT = tf.one_hot(y_int,10) x_4d = tf.image.resize_bilinear(X, [28, 28]) DigitCaps = CapsuleLayer.capsnet_forward(x_4d) hyperthesis = tf.norm(DigitCaps, ord=2, axis=-1)#(?, 10) recon_x = CapsuleLayer.reconstruct(DigitCaps,Y_ONE_HOT) margin_loss = CapsuleLayer.margin_loss(Y_ONE_HOT,hyperthesis) restruc_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x_4d-recon_x), axis=[1,2])) loss = margin_loss if RECONSTRUCT: loss += 5e-5 * restruc_loss train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) predict = tf.cast(tf.argmax(hyperthesis, 1),tf.int32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, y_int), tf.float32)) sess = tf.Session() saver = tf.train.Saver() if isNewTrain: sess.run(tf.global_variables_initializer()) print('Initialized!') else : saver.restore(sess, modelName) print("Model restored") start_sec = time.time() print (' train:%d, valid:%d, test:%d, REDUCE_DATA_COUNT_RATIO:%d' %( len(mnist.train.images),len(affNIST_in),len(mnist.test.images),REDUCE_DATA_COUNT_RATIO)) def feed_all(x, y, train=False, Pad=False): m = (int)(len(y)/REDUCE_DATA_COUNT_RATIO) iter = (int)((m-1)/BATCH+1) acc_sum = np.zeros((1), np.float) for i in range(iter): start = i * BATCH end = np.minimum(start + BATCH, m) batch_x = x[start:end] if Pad: batch_x = util.padding(batch_x) else: batch_x = np.reshape(batch_x, [-1,h,w,1]) feed = {X:batch_x , Y: y[start:end]} if train: _,ML,RL,acc = sess.run([train_step,margin_loss,restruc_loss,accuracy],feed) else : ML,RL,acc = sess.run([margin_loss,restruc_loss,accuracy],feed) acc_sum += acc/iter return acc_sum,ML,RL for i in range(epoch): train_accuracy,ML_tr,RL_tr = feed_all(mnist.train.images, mnist.train.labels,train=True, Pad=True) if i<10 or i % FREQ == 0: valid_accuracy,ML_v,RL_v = feed_all(mnist.test.images, mnist.test.labels,train=False, Pad=True) test_accuracy,ML_te,RL_te = feed_all(affNIST_in,affNIST_out,train=False,Pad=False) now = strftime("%H:%M:%S", localtime()) print('step %d/%d, accuracy train:%.3f valid:%.3f test:%.3f loss:(%.7f, %.4f) %s' % (i,epoch, train_accuracy,valid_accuracy,test_accuracy,ML_tr,RL_tr,now)) this_sec = time.time() if i==epoch-0 or this_sec - start_sec > 60 * 5 : start_sec = this_sec save_path = saver.save(sess, modelName) print("Model Saved, time:%s, %s" %(now, save_path)) for i in range(10): start = i end = start + 1 batch_x = mnist.train.images[start:end] batch_x = util.padding(batch_x) batch_y = mnist.train.labels[start:end] feed = {X:batch_x , Y: batch_y} acc,recon_arr, ori_arr = sess.run([accuracy,recon_x,x_4d],feed) dual_image = np.stack([ori_arr,recon_arr]) recon_image = np.reshape(dual_image,[28*2,28]) util.save(recon_image,batch_y,'./reconstruct/',i) save_path = saver.save(sess, modelName)
def bi_lstm(): tf.set_random_seed(1) # load data train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) with tf.device('/cpu:0'): embedding = tf.get_variable("embedding", [FLAGS.emb_size, FLAGS.word_dim], tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [None]) mask = tf.placeholder(tf.int32, [None]) output_keep_prob = tf.placeholder(tf.float32) seq_len = tf.placeholder(tf.int32, [None]) # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(embedding, x_) with tf.device('/gpu:2'): # lstm cell lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) # dropout lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=1 - FLAGS.dropout) lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=1 - FLAGS.dropout) # Get lstm cell output outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_cell_fw, lstm_cell_bw, x, sequence_length=seq_len, dtype=tf.float32) outputs = tf.concat(2, outputs) outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden]) # define weights and biases of logistic layer with tf.variable_scope('linear'): weights = tf.get_variable("weight", [2 * FLAGS.n_hidden, FLAGS.n_classes], tf.float32) biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32) logits = tf.matmul(outputs, weights) + biases loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_) * tf.cast(mask, tf.float32)) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask correct = tf.reduce_sum(y) with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.global_variables_initializer()) FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(FLAGS.epoch_size * FLAGS.epoch_step): batch_x, batch_y, mask_feed = util.padding( *train_data.next_batch(FLAGS.batch_size)) sequence_length = batch_x.shape[1] * np.ones( [FLAGS.batch_size], np.int32) sess.run(train_op, feed_dict={ x_: batch_x, y_: batch_y, output_keep_prob: 1 - FLAGS.dropout, mask: mask_feed, seq_len: sequence_length }) if step % 100 == 0: evalution(sess, correct, x_, y_, mask, output_keep_prob, seq_len, dev_data)
def bi_lstm_crf(): # load data print 'start read dataset' train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) dev_data.fake_data(FLAGS.batch_size) print 'stop read dataset' tf.set_random_seed(1) # 词向量放到cpu里面可以节省显存 with tf.device('/cpu:0'): with tf.variable_scope('embedding') as scope: random_embedding = tf.get_variable( name="random_embedding", shape=[FLAGS.emb_size, FLAGS.word_dim], dtype=tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) output_keep_prob = tf.placeholder(tf.float32) sequence_length = tf.reduce_sum(tf.sign(x_), reduction_indices=1) sequence_length = tf.cast(sequence_length, tf.int32) with tf.device('/gpu:2'): with tf.variable_scope('input_layer'): # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(random_embedding, x_) # lstm cell with tf.name_scope('bi_lstm_layer'): lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) # Get lstm cell output outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell_fw, cell_bw=lstm_cell_bw, inputs=x, sequence_length=sequence_length, dtype=tf.float32) outputs = tf.concat(2, outputs) outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden]) outputs = tf.nn.dropout(outputs, keep_prob=output_keep_prob) with tf.variable_scope('Softmax'): weights = tf.get_variable( name="weights", shape=[2 * FLAGS.n_hidden, FLAGS.n_classes], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01)) biases = tf.get_variable(name="biases", shape=[FLAGS.n_classes], dtype=tf.float32) matricized_unary_scores = tf.matmul(outputs, weights) + biases unary_scores = tf.reshape(matricized_unary_scores, [FLAGS.batch_size, -1, FLAGS.n_classes]) log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood( unary_scores, y_, sequence_length) l2_loss = tf.nn.l2_loss(weights) * FLAGS.beta loss = tf.reduce_mean(-log_likelihood) + l2_loss train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) saver = tf.train.Saver() best_acc = 0 if FLAGS.is_training == 1: with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.global_variables_initializer()) epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(epoch_size * FLAGS.epoch_step): batch_x, batch_y, _ = util.padding( *train_data.next_batch(FLAGS.batch_size)) sess.run( [l2_loss, loss, train_op], feed_dict={ x_: batch_x, y_: batch_y.reshape([FLAGS.batch_size, -1]), output_keep_prob: 1 - FLAGS.dropout }) if step % 100 == 0: cur_acc = evalution(sess, transition_params, dev_data, x_, y_, output_keep_prob, unary_scores, sequence_length) if cur_acc > best_acc: best_acc = cur_acc #saver.save(sess,'best.model') print 'best_acc: ' + str(best_acc) else: pass
def __init__( self, dataset, test_file_id=1, # from 1 to 10 path='./MitchellEtAI/', max_len=41, batch_size=32, max_char_len=10, max_target_len=3, window_size=5): # data = [read_file(path + dataset + '/10-fold/test.' + str(i+1)) for i in range(10)] # test_data = data[test_file_id] # train_data= reduce(lambda x1,x2:x1+x2, data[:test_file_id] + data[test_file_id+1:]) train_data = read_file(path + dataset + '/10-fold/train.' + str(test_file_id)) test_data = read_file(path + dataset + '/10-fold/test.' + str(test_file_id)) test_text = [x[0] for x in test_data] test_seq_label = [x[1] for x in test_data] test_seq_senti = [x[2] for x in test_data] test_pos = [x[3] for x in test_data] test_char = [x[4] for x in test_data] self.test_raw_text = test_text train_text = [x[0] for x in train_data] train_seq_label = [x[1] for x in train_data] train_seq_senti = [x[2] for x in train_data] train_pos = [x[3] for x in train_data] train_char = [x[4] for x in train_data] tmp_train_char = reduce(lambda x1, x2: x1 + x2, [x for x in train_char]) self.char_dict = util.get_char_dict(tmp_train_char) self.word_dict = util.get_dict(train_text) self.pos_dict = util.get_dict(train_pos) self.seq_label_dict = 5 # 0~4 for O, B-person, I-Person, B-organization, I-organization self.senti_label_size = 7 # 0~6 for None, B_Negative,I_Negative, B_Neutral, I_Neutral B_Positive , I_Positive print(' seq label size: {}'.format(self.seq_label_dict)) word2vec_path = '~/glove/GoogleNews-vectors-negative300.bin' binary = True emb_dim = 300 if dataset == 'es': word2vec_path = '~/glove/SBW-vectors-300-min5.bin' emb_dim = 300 # 300 dim # word2vec_path = '~/glove/fasttext-sbwc.3.6.e20-es.bin' # word2vec_path = '~/glove/glove-sbwc.i25-es.bin' word2vec_path = '/home/mdh/glove/embedding_file' binary = False emb_dim = 200 # 200 dim word_vectors = KeyedVectors.load_word2vec_format(word2vec_path, binary=binary) self.embedding = np.random.uniform( -0.1, 0.1, (len(self.word_dict), emb_dim)).astype('float32') for k in self.word_dict: if k in word_vectors: self.embedding[self.word_dict[k]][:] = word_vectors[k] dump_path = '' if dataset == 'en': dump_path = './embeddings/acl2015-en.pkl' else: dump_path = './embeddings/acl2015-fast-es.pkl' with open(dump_path, 'wb') as f: pkl.dump(self.embedding, f) dump_path = '' ''' if dataset == 'en': dump_path = './embeddings/acl2015-en.pkl' else: dump_path = './embeddings/acl2015-es.pkl' with open(dump_path, 'rb') as f: self.embedding = pkl.load(f) ''' self.char_size = len(self.char_dict) self.vocab_size = len(self.word_dict) self.label_size = 3 # no use self.pos_size = len(self.pos_dict) self.ner_size = 10 # no use self.max_len = max_len self.batch_size = batch_size self.term_label_size = len(reverse_seq_label_dict) self.window_size = window_size train_char = util.char2idx(train_char, self.char_dict) test_char = util.char2idx(test_char, self.char_dict) train_text = util.word2idx(train_text, self.word_dict) test_text = util.word2idx(test_text, self.word_dict) train_text, train_seq_len, train_mask = util.padding( train_text, max_len) test_text, test_seq_len, test_mask = util.padding(test_text, max_len) train_context = util.context_window(np.asarray(train_text), self.window_size) test_context = util.context_window(np.asarray(test_text), self.window_size) train_char, train_char_len = util.char_padding(train_char, max_len, max_char_len) test_char, test_char_len = util.char_padding(test_char, max_len, max_char_len) train_seq_label, _, _ = util.padding(train_seq_label, max_len) test_seq_label, _, _ = util.padding(test_seq_label, max_len) train_seq_senti, _, _ = util.padding(train_seq_senti, max_len) test_seq_senti, _, _ = util.padding(test_seq_senti, max_len) train_pos = util.word2idx(train_pos, self.pos_dict) test_pos = util.word2idx(test_pos, self.pos_dict) train_pos, _, _ = util.padding(train_pos, max_len) test_pos, _, _ = util.padding(test_pos, max_len) self.train_pos = np.asarray(train_pos[:]).astype('int32') self.train_text = np.asarray(train_text[:]).astype('int32') self.train_mask = np.asarray(train_mask[:]).astype('float32') self.train_char = np.asarray(train_char[:]).astype('int32') self.train_context = np.asarray(train_context[:]).astype('int32') self.train_seq_len = np.asarray(train_seq_len[:]).astype('int32') self.train_char_len = np.asarray(train_char_len[:]).astype('int32') self.train_seq_label = np.asarray(train_seq_label[:]).astype('int32') self.train_seq_senti = np.asarray(train_seq_senti[:]).astype('int32') print('Train pos shape : {}'.format(self.train_pos.shape)) print('Train text shape : {}'.format(self.train_text.shape)) print('Train mask shape : {}'.format(self.train_mask.shape)) print('Train char shape : {}'.format(self.train_char.shape)) print('Train context shape : {}'.format(self.train_context.shape)) print('Train text len shape : {}'.format(self.train_seq_len.shape)) print('Train char len shape : {}'.format(self.train_char_len.shape)) print('Train seq label shape: {}'.format(self.train_seq_label.shape)) print('Train seq senti shape: {}\n'.format(self.train_seq_senti.shape)) self.test_pos = np.asarray(test_pos).astype('int32') self.test_text = np.asarray(test_text).astype('int32') self.test_mask = np.asarray(test_mask).astype('float32') self.test_char = np.asarray(test_char).astype('int32') self.test_context = np.asarray(test_context).astype('int32') self.test_seq_len = np.asarray(test_seq_len).astype('int32') self.test_char_len = np.asarray(test_char_len).astype('int32') self.test_seq_label = np.asarray(test_seq_label).astype('int32') self.test_seq_senti = np.asarray(test_seq_senti).astype('int32') print('Test pos shape : {}'.format(self.test_pos.shape)) print('Test text shape : {}'.format(self.test_text.shape)) print('Test mask shape : {}'.format(self.test_mask.shape)) print('Test char shape : {}'.format(self.test_char.shape)) print('Test context shape : {}'.format(self.test_context.shape)) print('Test text len shape : {}'.format(self.test_seq_len.shape)) print('Test char len shape : {}'.format(self.test_char_len.shape)) print('Test seq label shape : {}'.format(self.test_seq_label.shape)) print('Test seq senti shape : {}\n'.format(self.test_seq_senti.shape)) self.train_epoch = self.train_text.shape[0] // self.batch_size if self.train_text.shape[0] % self.batch_size: self.train_epoch += 1 self.test_epoch = self.test_text.shape[0] // self.batch_size if self.test_text.shape[0] % self.batch_size: self.test_epoch += 1 print('Train epoch: {}'.format(self.train_epoch)) # print 'Valid epoch: {}'.format(self.valid_epoch) print('Test epoch: {}'.format(self.test_epoch)) self.shuffle() match(seq_label_dict) s = sorted(seq_label_dict.items(), key=lambda x: x[1], reverse=True) for x in s: print(x) for k in BI_dict: print(k, ' : ', BI_dict[k]) self.text_dict_reverse = {} for k in self.word_dict: self.text_dict_reverse[self.word_dict[k]] = k