Esempio n. 1
0
def get_batches_test():
    print("Loading test data...")
    df = data_helpers.read_data("/home/sahil/ML-bucket/test.csv")
    lexical_features = lexical_level_features(df)
    batch_iterator = data_helpers.batch_iter(
        lexical_features, FLAGS.batch_size, 1, shuffle=False)
    return batch_iterator
Esempio n. 2
0
def get_validation_data():
    df = data_helpers.read_data("/home/sahil/ML-bucket/data/validation.csv")
    lexical_features = lexical_level_features(df)
    X_val = list()
    Y_val = list()
    for iter in lexical_features:
        X_val.append(iter[0])
        Y_val.append(iter[1])
    return np.asarray(X_val), np.asarray(Y_val)
Esempio n. 3
0
def get_acis_stn_latlon(acis_station_url=ACIS_STATION_URL, stationtype=1):
    stations = [(key, val[0]) for (key, val) in ws.stations.items()
                if val[stationtype] == 1]
    stationnamess = [item[0] for item in stations]
    stationIDs = [item[1] for item in stations]
    #get location data from acis
    logging.info("Getting latlon data for stations")
    acis_params = {}
    acis_params['sids'] = ','.join(stationIDs)
    acis_params['meta'] = 'll'
    acis_station_data = dh.read_data(acis_station_url, params=acis_params)
    station_meta = {
        name: {
            'lon': item['ll'][0],
            'lat': item['ll'][1]
        }
        for (name, item) in zip(stationnamess, acis_station_data['meta'])
    }
    return station_meta
Esempio n. 4
0
import tensorflow as tf
import numpy as np
import os
import time
import gc

filename = '../data/wiki_cut.txt'
batch_size = 64
num_epochs = 20
window_size = 7
min_time = 5
embedding_size = 100
num_sampled = 16
start_lr = 1.5 / batch_size

datas, words = read_data(filename)
dataset, word_dictionary, character_dict, wordID_charID, total_instance, dictionary_pro = build_dataset(datas, words)
reverse_dictionary = dict(zip(word_dictionary.values(), word_dictionary.keys()))

del datas, words
gc.collect()

valid_size = 6
valid_examples = np.array([word_dictionary['街道'], word_dictionary['教授'], word_dictionary['医生'],
                           word_dictionary['英里'], word_dictionary['计算机'], word_dictionary['老虎']])

model = WordCharModel(word_size=len(word_dictionary),
                      character_size=len(character_dict),
                      embedding_size=embedding_size,
                      num_sampled=num_sampled,
                      valid_examples=valid_examples,
Esempio n. 5
0
                                             shuffle=False)
    return batch_iterator


def get_validation_data():
    df = data_helpers.read_data("/home/sahil/ML-bucket/data/validation.csv")
    lexical_features = lexical_level_features(df)
    X_val = list()
    Y_val = list()
    for iter in lexical_features:
        X_val.append(iter[0])
        Y_val.append(iter[1])
    return np.asarray(X_val), np.asarray(Y_val)


df = data_helpers.read_data()

np.random.seed(42)
pivot = 2 * FLAGS.sequence_length + 1
pos_vec = np.random.uniform(-1, 1, (pivot + 1, FLAGS.distance_dim))
# pos_vec_entities = np.random.uniform(-1, 1, (4, FLAGS.distance_dim))

# beginning and end of sentence embeddings
beg_emb = np.random.uniform(-1, 1, FLAGS.embedding_size)
end_emb = np.random.uniform(-1, 1, FLAGS.embedding_size)
extra_emb = np.random.uniform(-1, 1, FLAGS.embedding_size)

# sequence_length = 0
# ain = ""
'''Find the max length b/w entities'''
# for index, row in df.iterrows():
Esempio n. 6
0
    #                  (u'm\u1ee5c \u0111\xedch', 'N', 'B-NP'), (u'li\u1ec7t k\xea', 'V', 'O'), (u'v\xe0', 'C', 'O'),
    #                  (u'n\xeau', 'V', 'B-VP'), (u'ra', 'R', 'O'), (u'\xfd ngh\u0129a', 'A', 'B-AP'),
    #                  (u'c\u1ee7a', 'E', 'I-VP'), (u'c\xe1c', 'L', 'I-NP'), (u'nh\xe3n chunking', 'N', 'B-NP'),
    #                  (u'trong', 'E', 'B-PP'), (u'c\xe1c', 'L', 'B-NP'), (u'corpus', 'Nb', 'B-NP'),
    #                  (u'ti\u1ebfng', 'N', 'I-NP'), (u'Vi\u1ec7t', 'Np', 'B-NP'), (u'.', '.', 'O'),
    #                  (u'Sau', 'E', 'B-PP'), (u'\u0111\xf3', 'P', 'B-NP'), (u'\u0111\u01b0a', 'V', 'B-VP'),
    #                  (u'ra', 'R', 'O'), (u'\xe1nh', 'Nc', 'B-NP'), (u'x\u1ea1', 'N', 'B-NP'), (u'chung', 'A', 'B-AP'),
    #                  (u'v\xe0', 'C', 'O'), (u'chu\u1ea9n h\xf3a', 'V', 'B-VP'), (u'c\xe1c', 'L', 'B-NP'),
    #                  (u'nh\xe3n chunking', 'N', 'I-PP'), (u'.', '.', 'I-NP')]
    # print(evaluate(pred_sentence, test_sentence))
    # args = parseArgument()
    # MODEL_NAME = args.model
    MODEL_NAME = "chunk.pkl"

    # Transform text data to feature
    test_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/test.txt")

    X_test = [
        chunking_sent2features(sent=sent, mode='test') for sent in test_sents
    ]
    y_test = [sent2label(sent) for sent in test_sents]

    print(list(set([x for sent in y_test for x in sent])))
    # Load trained model
    print("=======================")
    print("Load trained model ...")
    model = pickle.load(open("./models/" + MODEL_NAME, "rb"))
    print("Done!!!")

    predict = model.predict(X_test)
Esempio n. 7
0
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # Load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        X = graph.get_operation_by_name("X").outputs[0]
        ground_truth_shadow_masks = graph.get_operation_by_name("y").outputs[0]

        g_tanh = graph.get_operation_by_name(
            "generator/deconv_1/tanh").outputs[0]

        for i, batch in enumerate(read_data(train=False)):
            s1, s2, s3, shadow = np.array([batch[0]]), np.array(
                batch[1]), np.array(batch[2]), np.array(batch[3])
            print(s1.shape, s2.shape, s3.shape)
            orig_w, orig_h = shadow.shape
            denominator = 25 + 5 * s2.shape[0] + s3.shape[0]
            s1_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s1}))
            # s2_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s2}))
            # s3_shadow_map = np.array(sess.run(g_tanh, feed_dict={X: s3}))
            s1_shadow_map_resized = 25. * np.array(
                resize(s1_shadow_map, h=orig_h, w=orig_w))
            # s2_shadow_map_resized = 5. * np.array(
            #     [resize(s2_shadow_map[k], h=orig_h, w=orig_w) for k in range(s2_shadow_map.shape[0])])
            # s3_shadow_map_resized = np.array([resize(s3_shadow_map[k], h=orig_h, w=orig_w) for k in
            #                                   range(s3_shadow_map.shape[0])])
            weighted_matrx = s1_shadow_map_resized
Esempio n. 8
0
from __future__ import print_function

from data_helpers import read_data, sent2label
from src.CONSTANT import ROOT_DIR
from src.features.features import chunking_sent2features

MODEL_NAME = "chunk.pkl"
# Read data
train_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/train.txt")
dev_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/dev.txt")
test_sents = read_data(ROOT_DIR + "/data/ner/vlsp2016/corpus/test.txt")

# Transform text data to feature
X_train = [
    chunking_sent2features(sent=sent, mode='train') for sent in train_sents
]
y_train = [sent2label(sent) for sent in train_sents]

X_dev = [chunking_sent2features(sent=sent, mode='dev') for sent in dev_sents]
y_dev = [sent2label(sent) for sent in dev_sents]

X_test = [
    chunking_sent2features(sent=sent, mode='test') for sent in test_sents
]
y_test = [sent2label(sent) for sent in test_sents]

transition = {}
for dataset in (train_sents, dev_sents, test_sents):
    for sent in dataset:
        for word in sent:
            tmp = word[1] + "->" + word[2]
Esempio n. 9
0
    "Evaluate model on dev set after this many steps (default: 100)")
tf.app.flags.DEFINE_integer("checkpoint_every", 100,
                            "Save model after this many steps (default: 100)")
tf.app.flags.DEFINE_integer("num_checkpoints", 5,
                            "Number of checkpoints to store (default: 5)")
#flags.DEFINE_string("inputFile", "final.csv", "Input file to build vocabulary from")

tf.app.flags.DEFINE_string("inputFile", "train_data_new2.csv",
                           "Input file to build vocabulary from")
tf.app.flags.DEFINE_boolean("allow_soft_placement", True,
                            "Allow device soft device placement")
tf.app.flags.DEFINE_boolean("log_device_placement", False,
                            "Log placement of ops on devices")
FLAGS = tf.app.flags.FLAGS

words, count_words = data_helpers.read_data(FLAGS.inputFile)

#words , count_words = data_helpers.read_data(tf.app.flags.FLAGS.inputFile)
x_, y = data_helpers.get_data()
data = [len(x.split(" ")) for x in x_]

for i in range(0, len(data)):
    if (data[i] > 200):
        print(i)
max_document_length = 128
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_)))
vocab_dict = vocab_processor.vocabulary_._mapping
sorted_vocab = sorted(vocab_dict.items(), key=lambda x: x[1])
vocabulary = list(list(zip(*sorted_vocab))[0])
file = open("vocab_classifier1.txt", "w")
Esempio n. 10
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import math
import random

import numpy as np
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
import data_helpers

words = data_helpers.read_data()
print('Data size', len(words))

# Step 2: Build the dictionary and replace rare words with UNK token.
vocabulary_size = 15000


def build_dataset(words):
    count = [['UNK', -1]]
    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        if word in dictionary:
            index = dictionary[word]
Esempio n. 11
0
        try:
            print("Loading model from {}".format(checkpoint_file))
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)
            print("Model successfully loaded...")
        except Exception as err:
            print("Error loading {}".format(checkpoint_file))
        X = graph.get_operation_by_name("X").outputs[0]

        ground_truth_shadow_masks = graph.get_operation_by_name("y").outputs[0]
        g_tanh = graph.get_operation_by_name(
            "generator/deconv_1/tanh").outputs[0]
        d_sigmoid = graph.get_operation_by_name(
            "discriminator/fc/sigmoid").outputs[0]
        global_step = graph.get_operation_by_name("global_step").outputs[0]
        d_optimizer = graph.get_operation_by_name(
            "train/d_optimizer").outputs[0]
        g_optimizer = graph.get_operation_by_name(
            "train/g_optimizer").outputs[0]

        for batch, e, num in read_data(epochs=1000):
            x, y = zip(*batch)
            x = np.array(x)
            y = np.array(y)
            step, summary, d_loss_value = sess.run(
                [d_optimizer, merged_summary, d_loss],
                feed_dict={
                    X: x,
                    ground_truth_shadow_masks: y
                })
Esempio n. 12
0
        init = tf.global_variables_initializer()
        sess.run(init)

        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join("../", "Models", timestamp))
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(out_dir + "/summaries")
        writer.add_graph(sess.graph)
        saver = tf.train.Saver(tf.all_variables())

        for batch, e, num in read_data():
            x, y = zip(*batch)
            x = np.array(x)
            y = np.array(y)
            step, d_loss_value, g_x = sess.run([d_train_step, d_loss, gx],
                                               feed_dict={
                                                   X: x,
                                                   ground_truth_shadow_masks: y
                                               })
            step, g_loss_value = sess.run([g_train_step, g_loss],
                                          feed_dict={
                                              X: x,
                                              ground_truth_shadow_masks: y
                                          })
            step, g_loss_value = sess.run([g_train_step, g_loss],
                                          feed_dict={
Esempio n. 13
0
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        g_summary = tf.summary.merge([
            input_image, generator_image, shadow_image, g_loss_summary,
            d_fake_hist
        ])
        d_summary = tf.summary.merge([d_loss_summary, d_real_hist])
        # merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(out_dir + "/summaries")
        writer.add_graph(sess.graph)
        saver = tf.train.Saver(tf.all_variables())

        cnt = 0
        for batch, e, num in read_data(
                # data_path="/home/sahil/Desktop/Projects/Shadow_Detection_DL/Data/Videos/aton_campus/data",
                batch_size=batch_size,
                epochs=1000):
            x, y = zip(*batch)
            x = np.array(x)
            y = np.array(y)
            step, summary, d_loss_value, d1, d2 = sess.run(
                [d_train_step, d_summary, d_loss, dx_real, dx_fake],
                feed_dict={
                    X: x,
                    ground_truth_shadow_masks: y
                })
            writer.add_summary(summary, cnt)
            step, summary, g_loss_value, d1, d2 = sess.run(
                [g_train_step, g_summary, g_loss, dx_real, dx_fake],
                feed_dict={
                    X: x,