def get_batches_test(): print("Loading test data...") df = data_helpers.read_data("/home/sahil/ML-bucket/test.csv") lexical_features = lexical_level_features(df) batch_iterator = data_helpers.batch_iter( lexical_features, FLAGS.batch_size, 1, shuffle=False) return batch_iterator
def get_validation_data(): df = data_helpers.read_data("/home/sahil/ML-bucket/data/validation.csv") lexical_features = lexical_level_features(df) X_val = list() Y_val = list() for iter in lexical_features: X_val.append(iter[0]) Y_val.append(iter[1]) return np.asarray(X_val), np.asarray(Y_val)
def get_acis_stn_latlon(acis_station_url=ACIS_STATION_URL, stationtype=1): stations = [(key, val[0]) for (key, val) in ws.stations.items() if val[stationtype] == 1] stationnamess = [item[0] for item in stations] stationIDs = [item[1] for item in stations] #get location data from acis logging.info("Getting latlon data for stations") acis_params = {} acis_params['sids'] = ','.join(stationIDs) acis_params['meta'] = 'll' acis_station_data = dh.read_data(acis_station_url, params=acis_params) station_meta = { name: { 'lon': item['ll'][0], 'lat': item['ll'][1] } for (name, item) in zip(stationnamess, acis_station_data['meta']) } return station_meta
import tensorflow as tf import numpy as np import os import time import gc filename = '../data/wiki_cut.txt' batch_size = 64 num_epochs = 20 window_size = 7 min_time = 5 embedding_size = 100 num_sampled = 16 start_lr = 1.5 / batch_size datas, words = read_data(filename) dataset, word_dictionary, character_dict, wordID_charID, total_instance, dictionary_pro = build_dataset(datas, words) reverse_dictionary = dict(zip(word_dictionary.values(), word_dictionary.keys())) del datas, words gc.collect() valid_size = 6 valid_examples = np.array([word_dictionary['街道'], word_dictionary['教授'], word_dictionary['医生'], word_dictionary['英里'], word_dictionary['计算机'], word_dictionary['老虎']]) model = WordCharModel(word_size=len(word_dictionary), character_size=len(character_dict), embedding_size=embedding_size, num_sampled=num_sampled, valid_examples=valid_examples,
shuffle=False) return batch_iterator def get_validation_data(): df = data_helpers.read_data("/home/sahil/ML-bucket/data/validation.csv") lexical_features = lexical_level_features(df) X_val = list() Y_val = list() for iter in lexical_features: X_val.append(iter[0]) Y_val.append(iter[1]) return np.asarray(X_val), np.asarray(Y_val) df = data_helpers.read_data() np.random.seed(42) pivot = 2 * FLAGS.sequence_length + 1 pos_vec = np.random.uniform(-1, 1, (pivot + 1, FLAGS.distance_dim)) # pos_vec_entities = np.random.uniform(-1, 1, (4, FLAGS.distance_dim)) # beginning and end of sentence embeddings beg_emb = np.random.uniform(-1, 1, FLAGS.embedding_size) end_emb = np.random.uniform(-1, 1, FLAGS.embedding_size) extra_emb = np.random.uniform(-1, 1, FLAGS.embedding_size) # sequence_length = 0 # ain = "" '''Find the max length b/w entities''' # for index, row in df.iterrows():
# (u'm\u1ee5c \u0111\xedch', 'N', 'B-NP'), (u'li\u1ec7t k\xea', 'V', 'O'), (u'v\xe0', 'C', 'O'), # (u'n\xeau', 'V', 'B-VP'), (u'ra', 'R', 'O'), (u'\xfd ngh\u0129a', 'A', 'B-AP'), # (u'c\u1ee7a', 'E', 'I-VP'), (u'c\xe1c', 'L', 'I-NP'), (u'nh\xe3n chunking', 'N', 'B-NP'), # (u'trong', 'E', 'B-PP'), (u'c\xe1c', 'L', 'B-NP'), (u'corpus', 'Nb', 'B-NP'), # (u'ti\u1ebfng', 'N', 'I-NP'), (u'Vi\u1ec7t', 'Np', 'B-NP'), (u'.', '.', 'O'), # (u'Sau', 'E', 'B-PP'), (u'\u0111\xf3', 'P', 'B-NP'), (u'\u0111\u01b0a', 'V', 'B-VP'), # (u'ra', 'R', 'O'), (u'\xe1nh', 'Nc', 'B-NP'), (u'x\u1ea1', 'N', 'B-NP'), (u'chung', 'A', 'B-AP'), # (u'v\xe0', 'C', 'O'), (u'chu\u1ea9n h\xf3a', 'V', 'B-VP'), (u'c\xe1c', 'L', 'B-NP'), # (u'nh\xe3n chunking', 'N', 'I-PP'), (u'.', '.', 'I-NP')] # print(evaluate(pred_sentence, test_sentence)) # args = parseArgument() # MODEL_NAME = args.model MODEL_NAME = "chunk.pkl" # Transform text data to feature test_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/test.txt") X_test = [ chunking_sent2features(sent=sent, mode='test') for sent in test_sents ] y_test = [sent2label(sent) for sent in test_sents] print(list(set([x for sent in y_test for x in sent]))) # Load trained model print("=======================") print("Load trained model ...") model = pickle.load(open("./models/" + MODEL_NAME, "rb")) print("Done!!!") predict = model.predict(X_test)
graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) X = graph.get_operation_by_name("X").outputs[0] ground_truth_shadow_masks = graph.get_operation_by_name("y").outputs[0] g_tanh = graph.get_operation_by_name( "generator/deconv_1/tanh").outputs[0] for i, batch in enumerate(read_data(train=False)): s1, s2, s3, shadow = np.array([batch[0]]), np.array( batch[1]), np.array(batch[2]), np.array(batch[3]) print(s1.shape, s2.shape, s3.shape) orig_w, orig_h = shadow.shape denominator = 25 + 5 * s2.shape[0] + s3.shape[0] s1_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s1})) # s2_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s2})) # s3_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s3})) s1_shadow_map_resized = 25. * np.array( resize(s1_shadow_map, h=orig_h, w=orig_w)) # s2_shadow_map_resized = 5. * np.array( # [resize(s2_shadow_map[k], h=orig_h, w=orig_w) for k in range(s2_shadow_map.shape[0])]) # s3_shadow_map_resized = np.array([resize(s3_shadow_map[k], h=orig_h, w=orig_w) for k in # range(s3_shadow_map.shape[0])]) weighted_matrx = s1_shadow_map_resized
from __future__ import print_function from data_helpers import read_data, sent2label from src.CONSTANT import ROOT_DIR from src.features.features import chunking_sent2features MODEL_NAME = "chunk.pkl" # Read data train_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/train.txt") dev_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/dev.txt") test_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/test.txt") # Transform text data to feature X_train = [ chunking_sent2features(sent=sent, mode='train') for sent in train_sents ] y_train = [sent2label(sent) for sent in train_sents] X_dev = [chunking_sent2features(sent=sent, mode='dev') for sent in dev_sents] y_dev = [sent2label(sent) for sent in dev_sents] X_test = [ chunking_sent2features(sent=sent, mode='test') for sent in test_sents ] y_test = [sent2label(sent) for sent in test_sents] transition = {} for dataset in (train_sents, dev_sents, test_sents): for sent in dataset: for word in sent: tmp = word[1] + "->" + word[2]
"Evaluate model on dev set after this many steps (default: 100)") tf.app.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") tf.app.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)") #flags.DEFINE_string("inputFile", "final.csv", "Input file to build vocabulary from") tf.app.flags.DEFINE_string("inputFile", "train_data_new2.csv", "Input file to build vocabulary from") tf.app.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.app.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.app.flags.FLAGS words, count_words = data_helpers.read_data(FLAGS.inputFile) #words , count_words = data_helpers.read_data(tf.app.flags.FLAGS.inputFile) x_, y = data_helpers.get_data() data = [len(x.split(" ")) for x in x_] for i in range(0, len(data)): if (data[i] > 200): print(i) max_document_length = 128 vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) x = np.array(list(vocab_processor.fit_transform(x_))) vocab_dict = vocab_processor.vocabulary_._mapping sorted_vocab = sorted(vocab_dict.items(), key=lambda x: x[1]) vocabulary = list(list(zip(*sorted_vocab))[0]) file = open("vocab_classifier1.txt", "w")
from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import math import random import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf import data_helpers words = data_helpers.read_data() print('Data size', len(words)) # Step 2: Build the dictionary and replace rare words with UNK token. vocabulary_size = 15000 def build_dataset(words): count = [['UNK', -1]] count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) dictionary = dict() for word, _ in count: dictionary[word] = len(dictionary) data = list() unk_count = 0 for word in words: if word in dictionary: index = dictionary[word]
try: print("Loading model from {}".format(checkpoint_file)) saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) print("Model successfully loaded...") except Exception as err: print("Error loading {}".format(checkpoint_file)) X = graph.get_operation_by_name("X").outputs[0] ground_truth_shadow_masks = graph.get_operation_by_name("y").outputs[0] g_tanh = graph.get_operation_by_name( "generator/deconv_1/tanh").outputs[0] d_sigmoid = graph.get_operation_by_name( "discriminator/fc/sigmoid").outputs[0] global_step = graph.get_operation_by_name("global_step").outputs[0] d_optimizer = graph.get_operation_by_name( "train/d_optimizer").outputs[0] g_optimizer = graph.get_operation_by_name( "train/g_optimizer").outputs[0] for batch, e, num in read_data(epochs=1000): x, y = zip(*batch) x = np.array(x) y = np.array(y) step, summary, d_loss_value = sess.run( [d_optimizer, merged_summary, d_loss], feed_dict={ X: x, ground_truth_shadow_masks: y })
init = tf.global_variables_initializer() sess.run(init) timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join("../", "Models", timestamp)) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) train_summary_dir = os.path.join(out_dir, "summaries", "train") merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(out_dir + "/summaries") writer.add_graph(sess.graph) saver = tf.train.Saver(tf.all_variables()) for batch, e, num in read_data(): x, y = zip(*batch) x = np.array(x) y = np.array(y) step, d_loss_value, g_x = sess.run([d_train_step, d_loss, gx], feed_dict={ X: x, ground_truth_shadow_masks: y }) step, g_loss_value = sess.run([g_train_step, g_loss], feed_dict={ X: x, ground_truth_shadow_masks: y }) step, g_loss_value = sess.run([g_train_step, g_loss], feed_dict={
if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) train_summary_dir = os.path.join(out_dir, "summaries", "train") g_summary = tf.summary.merge([ input_image, generator_image, shadow_image, g_loss_summary, d_fake_hist ]) d_summary = tf.summary.merge([d_loss_summary, d_real_hist]) # merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(out_dir + "/summaries") writer.add_graph(sess.graph) saver = tf.train.Saver(tf.all_variables()) cnt = 0 for batch, e, num in read_data( # data_path="/home/sahil/Desktop/Projects/Shadow_Detection_DL/Data/Videos/aton_campus/data", batch_size=batch_size, epochs=1000): x, y = zip(*batch) x = np.array(x) y = np.array(y) step, summary, d_loss_value, d1, d2 = sess.run( [d_train_step, d_summary, d_loss, dx_real, dx_fake], feed_dict={ X: x, ground_truth_shadow_masks: y }) writer.add_summary(summary, cnt) step, summary, g_loss_value, d1, d2 = sess.run( [g_train_step, g_summary, g_loss, dx_real, dx_fake], feed_dict={ X: x,