Beispiel #1
0
def train():
    train_x, train_y, words_dict, labels_dict, seqlen_all = data_helper.load("train.txt", 10000, 35)
    test_x, test_y, seqlen_test = data_helper.load_test_data("test_filter_2.txt", seqlen, words_dict, labels_dict)
    model = bilstm_text(voc_size,batch_size,seqlen,n_class,embedding_size,learn_rate)
    op_pred = model.pred
    op_loss = model.loss
    op_train = model.train_op
    op_acc = model.acc
    sess = tf.Session()
    init = tf.initialize_all_variables()
    sess.run(init)
    epoachs = 50
    cnt = 0

    for epoach in range(epoachs):
        batchs = data_helper.get_batch(64, train_x, train_y, seqlen_all)
        for batch_x,batch_y, batch_len in batchs:
            [_,train_acc] = sess.run([op_train,op_acc],feed_dict={model.inputs:batch_x,model.outputs:batch_y,model.seqlen_hdr:batch_len})
            print("{0} epoach {1} iters acc = {2}".format(epoach,cnt,train_acc))
            if cnt % 50 == 0:
                tmp_pred = sess.run(op_pred,feed_dict={model.inputs:batch_x,model.outputs:batch_y,model.seqlen_hdr:batch_len})
                print(tmp_pred)
                test(model, test_x, test_y, seqlen_test)
            cnt += 1
        print("---------test----------------")
        test(model,test_x, test_y, seqlen_test)
datefmt = "%a %d %b %Y %H:%M:%S"
formatter = logging.Formatter(fmt, datefmt)

fh.setFormatter(formatter)
logger.addHandler(fh)

#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file,
                                               FLAGS.embedding_size)

ori_quests, cand_quests, neg_quests, cat_ids = load_train_data(
    FLAGS.train_file, word2idx, FLAGS.num_unroll_steps)

test_ori_quests, test_cand_quests, labels, results, test_cat_ids = load_test_data(
    FLAGS.test_file, word2idx, FLAGS.num_unroll_steps)

for_test_ori_quests, for_test_cand_quests, for_labels, for_results, for_test_cat_ids = load_test_data(
    FLAGS.train_LONG, word2idx, FLAGS.num_unroll_steps)
#test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_TRAIN, word2idx, FLAGS.num_unroll_steps)

#----------------------------------- load data end ----------------------


def onehot_encoder(cat_ids_batch):
    return np.eye(CAT_NUMBER)[cat_ids_batch]


#----------------------------------- execute train model ---------------------------------
def run_step(sess,
             ori_batch,
Beispiel #3
0
datefmt = "%a %d %b %Y %H:%M:%S"
formatter = logging.Formatter(fmt, datefmt)

fh.setFormatter(formatter)
logger.addHandler(fh)

#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file,
                                               FLAGS.embedding_size)
ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx,
                                          FLAGS.sequence_len)
#train_quests, valid_quests = create_valid(zip(ori_quests, cand_quests))

test_ori_quests, test_cand_quests, labels, results = load_test_data(
    FLAGS.test_file, word2idx, FLAGS.sequence_len)
#----------------------------------- load data end ----------------------

#----------------------------------- build model --------------------------------------
filter_sizes = [
    int(filter_size.strip())
    for filter_size in FLAGS.filter_sizes.strip().split(",")
]

#----------------------------------- build model end ----------------------------------


#----------------------------------- execute train model ---------------------------------
def run_step(sess,
             ori_batch,
             cand_batch,
model.add(Flatten())
model.add(Dense(500))
model.add(Activation("relu"))
# softmax classifier
model.add(Dense(nb_classes))
model.add(Activation("softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=0.01),
              metrics=['accuracy'])

model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          nb_epoch=nb_epoch,
          verbose=1,
          validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

X_val = load_test_data("test.csv")
X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols)
X_val = X_val.astype('float32')
X_val /= 255
print(X_val.shape, 'Validation test samples')

preds = model.predict_classes(X_val, batch_size=batch_size, verbose=1)

save_preds(preds, "lenet_submission.csv")
Beispiel #5
0
#full connect layers
h_drop = tf.nn.dropout(pool_flat,keep_prob)

full_W = tf.Variable(tf.truncated_normal([4,n_class],stddev=0.1 ,dtype=tf.float32))
full_B = tf.Variable(tf.constant(0.1,dtype=tf.float32))

outputs = tf.nn.softmax(tf.matmul(h_drop,full_W)+full_B)
pred = tf.argmax(outputs,1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=labels))
acc = tf.reduce_mean(tf.cast(tf.equal(pred,tf.argmax(labels,1)),tf.float32))
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

train_x, train_y, words_dict, labels_dict, all_len = data_helper.load("data/train.txt",1000,s_limit_len)
test_x,test_y, testlen =  data_helper.load_test_data("data/test_filter_2.txt",s_limit_len,words_dict,labels_dict)

def test(sess,acc,pred,tes_x,test_y):
    y_pred, acc_test = sess.run([pred,acc],feed_dict={inputs:test_x,labels:test_y,keep_prob:1.0})
    y_true = sess.run(tf.argmax(test_y,1))

    print(metrics.classification_report(y_true,y_pred))



for epoach in range(1000):
    iter = 0
    test(sess,acc,pred,test_x,test_y)
    batchs = data_helper.get_batch(64,train_x,train_y,all_len)
    for [batch_x,batch_y,batch_len] in batchs:
        _,loss_,acc_,pred_list = sess.run([train_op,loss,acc,pred],feed_dict={inputs:batch_x, labels:batch_y,keep_prob:0.5})
Beispiel #6
0
fh = logging.FileHandler("./run.log", mode="w")
fh.setLevel(logging.INFO)

fmt = "%(asctime)-15s %(levelname)s %(filename)s %(lineno)d %(process)d %(message)s"
datefmt = "%a %d %b %Y %H:%M:%S"
formatter = logging.Formatter(fmt, datefmt)

fh.setFormatter(formatter)
logger.addHandler(fh)
#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size)
ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx, FLAGS.quest_len, FLAGS.answer_len)

test_ori_quests, test_cand_quests, labels, results = load_test_data(FLAGS.test_file, word2idx, FLAGS.quest_len, FLAGS.answer_len)
valid_ori_quests, valid_cand_quests, valid_labels, valid_results = load_test_data(FLAGS.valid_file, word2idx, FLAGS.quest_len, FLAGS.answer_len)
#----------------------------------- load data end ----------------------

#----------------------------------- execute train model ---------------------------------
def run_step(sess, ori_batch, cand_batch, neg_batch, lstm, dropout=1.):
    start_time = time.time()
    feed_dict = {
        lstm.ori_input_quests : ori_batch,
        lstm.cand_input_quests : cand_batch, 
        lstm.neg_input_quests : neg_batch,
        lstm.keep_prob : dropout
    }

    _, step, ori_cand_score, ori_neg_score, cur_loss, cur_acc = sess.run([train_op, global_step, lstm.ori_cand, lstm.ori_neg, lstm.loss, lstm.acc], feed_dict)
    time_str = datetime.datetime.now().isoformat()
fh.setFormatter(formatter)
logger.addHandler(fh)

#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file,
                                               FLAGS.embedding_size)

ori_quests, cand_quests, neg_quests, cat_ids = load_train_data(
    FLAGS.train_file, word2idx, FLAGS.sequence_len)

#train_quests, valid_quests = create_valid(zip(ori_quests, cand_quests))

test_ori_quests, test_cand_quests, labels, results, test_cat_ids = load_test_data(
    FLAGS.test_file, word2idx, FLAGS.sequence_len)

# for_test_ori_quests, for_test_cand_quests, for_labels, for_results , for_test_cat_ids = load_test_data(FLAGS.train_for_test, word2idx, FLAGS.sequence_len)
# test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_file_like_train, word2idx, FLAGS.sequence_len)

for_test_ori_quests, for_test_cand_quests, for_labels, for_results, for_test_cat_ids = load_test_data(
    FLAGS.train_LONG, word2idx, FLAGS.sequence_len)
#test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_SHORT, word2idx, FLAGS.sequence_len)

#----------------------------------- load data end ----------------------

#----------------------------------- build model --------------------------------------
filter_sizes = [
    int(filter_size.strip())
    for filter_size in FLAGS.filter_sizes.strip().split(",")
]
tf.flags.DEFINE_string("test_file", "twitter-datasets/test_data.txt", "Path and name of test file")
tf.flags.DEFINE_string("submission_filename", "submission_predictions" + str(int(time.time())), "Path and name of submission file")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")


test_data = data_helper.load_test_data(FLAGS.test_file)

# Map data into vocabulary
vocab_path = os.path.join(os.path.curdir, "vocabulary", "vocab")
vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
x_test = np.array(list(vocab_processor.transform(test_data)))

print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
Beispiel #9
0
fmt = "%(asctime)-15s %(levelname)s %(filename)s %(lineno)d %(process)d %(message)s"
datefmt = "%a %d %b %Y %H:%M:%S"
formatter = logging.Formatter(fmt, datefmt)

fh.setFormatter(formatter)
logger.addHandler(fh)
#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file,
                                               FLAGS.embedding_size)
ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx,
                                          FLAGS.num_unroll_steps)

test_ori_quests, test_cand_quests, labels, results = load_test_data(
    FLAGS.test_file, word2idx, FLAGS.num_unroll_steps)
valid_ori_quests, valid_cand_quests, valid_labels, valid_results = load_test_data(
    FLAGS.valid_file, word2idx, FLAGS.num_unroll_steps)

#----------------------------------- load data end ----------------------


#----------------------------------- execute train model ---------------------------------
def run_step(sess, ori_batch, cand_batch, neg_batch, lstm, dropout=1.):
    start_time = time.time()
    feed_dict = {
        lstm.ori_input_quests: ori_batch,
        lstm.cand_input_quests: cand_batch,
        lstm.neg_input_quests: neg_batch,
        lstm.keep_prob: dropout
    }
Beispiel #10
0
                                    })
        return prediction

    def prob(self, x):
        prob = self._sess.run([self._prob],
                              feed_dict={
                                  self._input_x: x,
                                  self._drop_keep_prob: 1
                              })
        return prob

    def score(self, x, label):
        acc = self._sess.run([self._acc],
                             feed_dict={
                                 self._input_x: x,
                                 self._input_y: label,
                                 self._drop_keep_prob: 1
                             })
        return acc


if __name__ == "main":
    x_text = load_test_data('twitter-datasets/test_data.txt')
    vocab = learn.preprocessing.VocabularyProcessor.restore('vocabulary/vocab')
    x_data = np.array(list(vocab.transform(x_text)))

    ckpt_path = 'run/1526114630/checkpoint/model-3000.meta'
    tmodel = test_model(ckpt_path=ckpt_path)
    result = tmodel.predict(x_data)
    create_submission_file(result[0], 'submission.txt')
Beispiel #11
0
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) 
# set of FC => RELU layers
model.add(Flatten())
model.add(Dense(500))
model.add(Activation("relu"))
# softmax classifier
model.add(Dense(nb_classes))
model.add(Activation("softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=0.01),
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

X_val = load_test_data("test.csv")
X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols)
X_val = X_val.astype('float32')
X_val /= 255
print(X_val.shape, 'Validation test samples')

preds = model.predict_classes(X_val, batch_size=batch_size, verbose=1)

save_preds(preds, "lenet_submission.csv")

                       "Path and name of submission file")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

test_data = data_helper.load_test_data(FLAGS.test_file)

# Map data into vocabulary
vocab_path = os.path.join(os.path.curdir, "vocabulary", "vocab")
vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
x_test = np.array(list(vocab_processor.transform(test_data)))

print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
Beispiel #13
0
import tensorflow as tf
import numpy as np
import data_helper
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as birnn
voc_size = 10000
batch_size = 64
seqlen = 35
learn_rate = 0.05
n_class = 2
embedding_size = 100
train_x, train_y, words_dict, labels_dict, seqlen_all = data_helper.load(
    "train.txt", 10000, 35)
one_hot_label = tf.one_hot(train_y, n_class)
test_x, test_y, seqlen_test = data_helper.load_test_data(
    "test_filter_2.txt", seqlen, words_dict, labels_dict)
# seqlen_all = np.array(seqlen_all)*10
inputs = tf.placeholder(tf.int64, [None, seqlen], name="seq_inputs")
outputs = tf.placeholder(tf.int64, [None, 2], name="outputs")
seqlen_hdr = tf.placeholder(tf.int64, [None])
W_embedding = tf.Variable(tf.random_uniform(shape=[voc_size, embedding_size]))
embedding = tf.nn.embedding_lookup(W_embedding, inputs)
# print("embding",embedding)
#embedding shape(35,100)
fwcell = LSTMCell(embedding_size)
bwcell = LSTMCell(embedding_size)
#seqlen这里应该是一个batchsize的长度,应该是一个tensor
out_bilstm, final_state = birnn(fwcell,
                                bwcell,
                                inputs=embedding,
                                sequence_length=seqlen_hdr,