def _ModelFn(features, labels, mode): if is_training: logits_out = self._BuildGraph(features) else: graph_def = self._GetGraphDef(use_trt, batch_size, model_dir) logits_out = importer.import_graph_def( graph_def, input_map={INPUT_NODE_NAME: features}, return_elements=[OUTPUT_NODE_NAME + ':0'], name='')[0] loss = losses.sparse_softmax_cross_entropy( labels=labels, logits=logits_out) summary.scalar('loss', loss) classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out') accuracy = metrics.accuracy( labels=labels, predictions=classes_out, name='acc_op') summary.scalar('accuracy', accuracy[1]) if mode == ModeKeys.EVAL: return EstimatorSpec( mode, loss=loss, eval_metric_ops={'accuracy': accuracy}) elif mode == ModeKeys.TRAIN: optimizer = AdamOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op)
def _ModelFn(features, labels, mode): if is_training: logits_out = self._BuildGraph(features) else: graph_def = self._GetGraphDef(use_trt, batch_size, model_dir) logits_out = importer.import_graph_def( graph_def, input_map={INPUT_NODE_NAME: features}, return_elements=[OUTPUT_NODE_NAME + ':0'], name='')[0] loss = losses.sparse_softmax_cross_entropy(labels=labels, logits=logits_out) summary.scalar('loss', loss) classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out') accuracy = metrics.accuracy(labels=labels, predictions=classes_out, name='acc_op') summary.scalar('accuracy', accuracy[1]) if mode == ModeKeys.EVAL: return EstimatorSpec(mode, loss=loss, eval_metric_ops={'accuracy': accuracy}) elif mode == ModeKeys.TRAIN: optimizer = AdamOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op)
def __init__(self, inputs, network, check_point="dqn.ckpt"): self.saver = tf.train.Saver() self.summary_writer = tf.summary.FileWriter("/tmp/dqn") self.inputs = inputs self.network = network self.targets = tf.placeholder(tf.float32, shape=(None, self.output_shape[1])) summary_names = ["actions", "loss", "exploration_rate", "fruits_eaten", "timesteps_survived"] self.summary_placeholders = {name: tf.placeholder(dtype=tf.float32) for name in summary_names} # self.summary_placeholders = [tf.placeholder(dtype=summary_variables[i].dtype) # for i in range(len(summary_names))] # summary_ops = [tf.assign(summary_variables[i],self.summary_placeholders[i]) # for i in range(len(summary_names)) summary = [tf.summary.histogram(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in range(1)] summary += [tf.summary.scalar(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in range(1, len(summary_names))] self.summary_ops = tf.summary.merge_all() self.loss = tf.losses.mean_squared_error(self.network, self.targets) optimizer = AdamOptimizer() self.train_step = optimizer.minimize(loss=self.loss) # # with tf.colocate_with(global_step): # self.update_op = tf.assign_add(global_step, 1) self.sess = tf.Session() self.summary_writer.add_graph(tf.get_default_graph()) with self.sess.as_default(): tf.global_variables_initializer().run() if os.path.exists(check_point): self.saver.restore(self.sess, check_point)
class CNN_prior: def get_attention(self): self.pos_attention = tf.reduce_sum(tf.gradients(self.pre_max[:, 1], self.vectors)[0] * self.vectors, axis=2) self.pos_attention = softmax_padding(self.pos_attention, self.padding, axis=1) self.neg_attention = tf.reduce_sum(tf.gradients(self.pre_max[:, 0], self.vectors)[0] * self.vectors, axis=2) self.neg_attention = softmax_padding(self.neg_attention, self.padding, axis=1) def forward(self, v): vectors2d = tf.expand_dims(v, 1) # None x 1 x 200 x 300 ... NHWC conv1 = tf.nn.conv2d( input=vectors2d, filter=self.f3, strides=[1, 1, 1, 1], padding="VALID" ) # None x 1 x words x 50 A1 = tf.nn.leaky_relu(conv1 + self.b3) self.a1 = A1 conv2 = tf.nn.conv2d( input=vectors2d, filter=self.f4, strides=[1, 1, 1, 1], padding="VALID" ) # None x 1 x words x 50 A2 = tf.nn.leaky_relu(conv2 + self.b4) self.a2 = A2 conv3 = tf.nn.conv2d( input=vectors2d, filter=self.f5, strides=[1, 1, 1, 1], padding="VALID" ) # None x 1 x words x 5 A3 = tf.nn.leaky_relu(conv3 + self.b5) max_A1_train = tf.reshape(tf.squeeze(tf.reduce_max(A1, 2)), [-1, 50]) # None x 5 max_A2_train = tf.reshape(tf.squeeze(tf.reduce_max(A2, 2)), [-1, 50]) # None x 5 max_A3_train = tf.reshape(tf.squeeze(tf.reduce_max(A3, 2)), [-1, 50]) # None x 5 concat = tf.concat([max_A1_train, max_A2_train, max_A3_train], axis=1) concat_drop = tf.nn.dropout(concat,keep_prob=self.dropout_rate) pre_max_true_drop = tf.matmul(concat_drop, self.relevance_weight) + self.relevance_bias rel = tf.nn.softmax(pre_max_true_drop, axis=1) sum_A1_train = tf.reshape(tf.squeeze(tf.reduce_sum(A1, 2)), [-1, 50]) # None x 5 sum_A2_train = tf.reshape(tf.squeeze(tf.reduce_sum(A2, 2)), [-1, 50]) # None x 5 sum_A3_train = tf.reshape(tf.squeeze(tf.reduce_sum(A3, 2)), [-1, 50]) # None x 5 concat_sums = tf.concat([sum_A1_train, sum_A2_train, sum_A3_train], axis=1) pre_max_sum = tf.matmul(concat_sums, self.relevance_weight) + self.relevance_bias return rel, pre_max_true_drop, pre_max_sum def groupby(self,att): return ndmatmul(self.group_by,att) def __init__(self, word_vector_size): tf.reset_default_graph() self.vector_size = word_vector_size self.vectors = tf.placeholder(tf.float32, shape=(None, None, word_vector_size)) self.user_terms = tf.placeholder(tf.float32, shape=(None, None)) self.ut2 = tf.placeholder(tf.float32, shape=(None, None)) self.group_by = tf.placeholder(tf.float32, shape=(None, None, None)) self.padding = tf.placeholder(tf.float32, shape=(None, None)) self.output = tf.placeholder(tf.float32, shape=(None, 1)) self.dropout_rate = tf.placeholder(tf.float32) xavier = tf.contrib.layers.xavier_initializer() # 50 tri-gram, 50 4-gram and 50 5-gram filter_tri = tf.Variable(xavier((1, 2, word_vector_size, 50)), name="weight") # bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias") # self.f3 = filter_tri self.b3 = bias_tri filter_4 = tf.Variable(xavier((1, 3, word_vector_size, 50)), name="weight") # bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias") self.f4 = filter_4 self.b4 = bias_4 filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)), name="weight") # bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias") self.f5 = filter_5 self.b5 = bias_5 with tf.name_scope("relevance"): hidden = 150 self.relevance_weight = tf.Variable(0.01 * xavier((hidden, 2))) self.relevance_bias = tf.Variable(0.0 * xavier((1, 2))) self.relevance_attention_weight = tf.Variable(0.01 * xavier((100, 2))) self.relevance_attention_bias = tf.Variable(0.0 * xavier((1, 2))) rel, pre_max_true_dropped, pre_max_sum = self.forward(self.vectors) self.relevance = rel[:, 1] ut = tf.expand_dims(self.ut2, 2) # NWC rel_masked, pre_max_true_masked_dropped, _ = self.forward(self.vectors * ut) self.rel_masked = rel_masked self.pre_max = pre_max_sum self.get_attention() # true_attention_error = 0.0 att_reg = 0.0 prediction_error = -tf.reduce_sum((self.output * tf.log(rel[:, 1] + 10 ** -5, name="log2rel") + ( 1 - self.output) * tf.log(rel[:, 0] + 10 ** -5, name="log3rel"))) # N, num_unique, text_length ; N,text_length pos_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.pos_attention, -1)), squeeze_dims=-1) neg_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.neg_attention, -1)), squeeze_dims=-1) self.pos_att_grouped = pos_attention self.neg_att_grouped = neg_attention pos_heads = tf.reduce_sum(tf.multiply(pos_attention, self.user_terms), axis=1) neg_heads = tf.reduce_sum(tf.multiply(neg_attention, self.user_terms), axis=1) self.pos_heads = pos_heads attention_error = 0.0 occlusion_error = 0.0 if use_attention: attention_error += tf.reduce_sum(self.output*(pos_heads - 0.5) ** 2) att_reg = tf.reduce_sum(self.output * tf.nn.relu(self.pos_attention - att_max_value) + (1-self.output) * tf.nn.relu(self.neg_attention-att_max_value)) occlusion_error = -tf.reduce_sum((self.output * tf.log(rel_masked[:, 1] + 10 ** -5, name="log2rel2") + ( 1 - self.output) * tf.log(rel_masked[:, 0] + 10 ** -5, name="log3rel2"))) self.att = attention_error self.error = ( prediction_error + tf.sign(tf.reduce_sum(self.user_terms)) * attention_error + tf.sign(tf.reduce_sum(self.user_terms)) * occlusion_error + tf.sign(tf.reduce_sum(self.user_terms)) * att_reg) self.a = tf.check_numerics(attention_error, message="att") + tf.check_numerics(pos_heads, message="pos-heads") + tf.check_numerics( neg_heads, message="neg-heads") self.opt = AdamOptimizer() self.optimizer = self.opt.minimize(self.error) self.uncertainty = 1 self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.n_trained = 0 self.training = False def get_feed_dict(self, doc): return {self.vectors: np.array(doc.vectors, dtype=np.float32).reshape([1, -1, self.vector_size]), self.output: [[doc.class_ * 1]], self.user_terms: np.array(doc.user_terms, dtype=np.float32).reshape([1, -1]), self.padding: np.array([1 for i in doc.words]).reshape([1, -1])} def blow_up(self,mat,num_rows,num_cols): blowed_mat = [i+[0]*(num_cols-len(i)) for i in mat] x=([0] * num_cols) * (num_rows - len(blowed_mat)) if x: blowed_mat.append(x) return blowed_mat def get_feed_dict_multiple(self, docs): dp = 0.7 if self.training else 1 maximum = max([len(doc.vectors) for doc in docs]) maximum = max([maximum,7]) max_terms = max([len(doc.user_terms) for doc in docs]) return {self.vectors: np.array( [doc.vectors[:maximum] + [[0] * (self.vector_size)] * (maximum - len(doc.vectors[:maximum])) for doc in docs]).reshape([-1, maximum, self.vector_size]), self.group_by:np.array([self.blow_up(doc.gb,max_terms,maximum) for doc in docs]), self.ut2: np.array( [doc.ut2[:maximum] + [0] * (maximum - len(doc.ut2[:maximum])) for doc in docs]).reshape([-1, maximum]), self.output: [[doc.class_ * 1] for doc in docs], self.user_terms: np.array( [doc.user_terms[:max_terms] + [0] * (max_terms - len(doc.user_terms[:max_terms])) for doc in docs]).reshape([-1, max_terms]), self.padding: np.array( [[1] * len(doc.vectors[:maximum]) + [0] * (maximum - len(doc.vectors[:maximum])) for doc in docs]).reshape([-1, maximum]), self.dropout_rate:dp} def load(self, filename): saver = tf.train.Saver() saver.restore(self.sess, filename) pass def train(self, docs, train_full=False): self.training = True self.sess.run(tf.global_variables_initializer()) sess = self.sess print("====23") n = len(docs) epochs = 200 if train_full: epochs = 10 self.n_trained = n import random random.shuffle(docs) last_10 = [100] * 10 prev_error = None for epoch in range(epochs): total_error = 0 for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]: fd = self.get_feed_dict_multiple(doc_s) try: sess.run(self.a, feed_dict=fd) except Exception as e: print("check") _, error = sess.run([self.optimizer, self.error], feed_dict=fd) # print(x,y) # if epoch>50 and x>=0.5: # print("ch") # print(error,error-x,x) total_error += error total_error = total_error / len(docs) # print(total_error) if train_full: saver = tf.train.Saver() saver.save(sess, "./{}.pkl".format(epoch)) # print(total_error) if epoch>10 and total_error > 4: self.train(docs) return last_10.pop(0) last_10.append(total_error) if max(last_10) < 0.05: print("breaking") break print(total_error) self.training = False def run(self, docs): sess = self.sess for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]: fd = self.get_feed_dict_multiple(doc_s) try: l1 = sess.run([self.relevance, self.pos_att_grouped, self.neg_att_grouped,self.pos_heads], feed_dict=fd) except Exception as e: print("here") for ind, doc in enumerate(doc_s): d = { "rel": l1[0][ind], "pos_att": l1[1][ind], "neg_att": l1[2][ind], "pos_heads": l1[3][ind] } doc.pred_class = 0 if d["rel"] < 0.5 else 1 doc.parameters = d
class CNN_prior: def get_attribution(self): self.pos_attribution = tf.reduce_sum( tf.gradients(self.pre_max_sum[:, 1], self.vectors)[0] * self.vectors, axis=2) self.pos_attribution = softmax_padding(self.pos_attribution, self.padding, axis=1) self.neg_attribution = tf.reduce_sum( tf.gradients(self.pre_max_sum[:, 0], self.vectors)[0] * self.vectors, axis=2) self.neg_attribution = softmax_padding(self.neg_attribution, self.padding, axis=1) def forward(self, v): vectors2d = tf.expand_dims(v, 1) # None x 1 x 200 x 300 ... NHWC conv1 = tf.nn.conv2d(input=vectors2d, filter=self.f3, strides=[1, 1, 1, 1], padding="VALID") # None x 1 x words x 50 A1 = tf.nn.leaky_relu(conv1 + self.b3) self.a1 = A1 conv2 = tf.nn.conv2d(input=vectors2d, filter=self.f4, strides=[1, 1, 1, 1], padding="VALID") # None x 1 x words x 50 A2 = tf.nn.leaky_relu(conv2 + self.b4) self.a2 = A2 conv3 = tf.nn.conv2d(input=vectors2d, filter=self.f5, strides=[1, 1, 1, 1], padding="VALID") # None x 1 x words x 5 A3 = tf.nn.leaky_relu(conv3 + self.b5) max_A1_train = tf.reshape(tf.squeeze(tf.reduce_max(A1, 2)), [-1, 50]) # None x 5 max_A2_train = tf.reshape(tf.squeeze(tf.reduce_max(A2, 2)), [-1, 50]) # None x 5 max_A3_train = tf.reshape(tf.squeeze(tf.reduce_max(A3, 2)), [-1, 50]) # None x 5 concat = tf.concat([max_A1_train, max_A2_train, max_A3_train], axis=1) concat_drop = tf.nn.dropout(concat, keep_prob=self.dropout_rate) pre_max_true_drop = tf.matmul( concat_drop, self.relevance_weight) + self.relevance_bias rel = tf.nn.softmax(pre_max_true_drop, axis=1) sum_A1_train = tf.reshape(tf.squeeze(tf.reduce_sum(A1, 2)), [-1, 50]) # None x 5 sum_A2_train = tf.reshape(tf.squeeze(tf.reduce_sum(A2, 2)), [-1, 50]) # None x 5 sum_A3_train = tf.reshape(tf.squeeze(tf.reduce_sum(A3, 2)), [-1, 50]) # None x 5 concat_sums = tf.concat( [sum_A1_train, sum_A2_train, sum_A3_train], axis=1) pre_max_sum = tf.matmul( concat_sums, self.relevance_weight) + self.relevance_bias return rel, pre_max_true_drop, pre_max_sum def __init__(self, word_vector_size): tf.reset_default_graph() self.vector_size = word_vector_size self.vectors = tf.placeholder(tf.float32, shape=(None, None, word_vector_size)) self.user_terms = tf.placeholder(tf.float32, shape=(None, None)) self.padding = tf.placeholder(tf.float32, shape=(None, None)) self.output = tf.placeholder(tf.float32, shape=(None, 1)) self.dropout_rate = tf.placeholder(tf.float32) xavier = tf.contrib.layers.xavier_initializer() # 50 tri-gram, 50 4-gram and 50 5-gram filter_tri = tf.Variable(xavier((1, 3, word_vector_size, 50)), name="weight") # bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias") # self.f3 = filter_tri self.b3 = bias_tri filter_4 = tf.Variable(xavier((1, 4, word_vector_size, 50)), name="weight") # bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias") self.f4 = filter_4 self.b4 = bias_4 filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)), name="weight") # bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias") self.f5 = filter_5 self.b5 = bias_5 with tf.name_scope("relevance"): hidden = 150 self.relevance_weight = tf.Variable(0.01 * xavier( (hidden, 2))) self.relevance_bias = tf.Variable(0.0 * xavier((1, 2))) self.relevance_attention_weight = tf.Variable( 0.01 * xavier((100, 2))) self.relevance_attention_bias = tf.Variable(0.0 * xavier( (1, 2))) rel, pre_max_true_dropped, pre_max_sum = self.forward( self.vectors) self.relevance = rel[:, 1] ut = tf.expand_dims(self.user_terms, 2) # NWC rel_masked, pre_max_true_masked_dropped, _ = self.forward( self.vectors * ut) self.rel_masked = rel_masked self.pre_max_sum = pre_max_sum self.get_attribution() prediction_error = -tf.reduce_sum( (self.output * tf.log(rel[:, 1] + 10**-5, name="log2rel") + (1 - self.output) * tf.log(rel[:, 0] + 10**-5, name="log3rel"))) pos_heads = tf.reduce_sum(tf.multiply(self.pos_attribution, self.user_terms), axis=1) neg_heads = tf.reduce_sum(tf.multiply(self.neg_attribution, self.user_terms), axis=1) misattribution_error = 0.0 corrective_error = 0.0 att_reg = 0.0 if use_attribution: misattribution_error += tf.reduce_sum( self.output * (pos_heads - 0.9)**2 + (1 - self.output) * (neg_heads - 0.9)**2) att_reg = tf.reduce_sum( self.output * tf.nn.relu(self.pos_attribution - att_max_value) + (1 - self.output) * tf.nn.relu(self.neg_attribution - att_max_value)) corrective_error = -tf.reduce_sum( (self.output * tf.log(rel_masked[:, 1] + 10**-5, name="log2rel2") + (1 - self.output) * tf.log(rel_masked[:, 0] + 10**-5, name="log3rel2"))) self.error = ( prediction_error + tf.sign(tf.reduce_sum(self.user_terms)) * (misattribution_error + corrective_error + att_reg)) self.opt = AdamOptimizer() self.optimizer = self.opt.minimize(self.error) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.training = False def get_feed_dict_multiple(self, docs): dp = 0.7 if self.training else 1 maximum = max([len(doc.vectors) for doc in docs] + [5]) return { self.vectors: np.array([ doc.vectors[:maximum] + [[0] * (self.vector_size)] * (maximum - len(doc.vectors[:maximum])) for doc in docs ]).reshape([-1, maximum, self.vector_size]), self.output: [[doc.class_ * 1] for doc in docs], self.user_terms: np.array([ doc.user_terms[:maximum] + [0] * (maximum - len(doc.user_terms[:maximum])) for doc in docs ]).reshape([-1, maximum]), self.padding: np.array([[1] * len(doc.vectors[:maximum]) + [0] * (maximum - len(doc.vectors[:maximum])) for doc in docs]).reshape([-1, maximum]), self.dropout_rate: dp } def train(self, docs): self.training = True # Re-initialize the machine during every training round self.sess.run(tf.global_variables_initializer()) sess = self.sess print("====") epochs = 200 # maximum training epochs random.shuffle(docs) last_10 = [100] * 10 for epoch in range(epochs): total_error = 0 # Stochastic Gradient Descent (mini-batch size = 1) works best. for doc_s in [ docs[i:i + 1] for i in range(0, len(docs), 1) ]: fd = self.get_feed_dict_multiple(doc_s) _, error = sess.run([self.optimizer, self.error], feed_dict=fd) total_error += error total_error = total_error / len(docs) if epoch > 10 and total_error > 4: self.train(docs) return last_10.pop(0) last_10.append(total_error) if max(last_10) < 0.05: print("breaking") break print(total_error) self.training = False def run(self, docs): random.shuffle(docs) sess = self.sess num_correct = 0 num_seen = 0 for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]: fd = self.get_feed_dict_multiple(doc_s) l1 = sess.run([ self.relevance, self.pos_attribution, self.neg_attribution ], feed_dict=fd) for ind, doc in enumerate(doc_s): d = { "rel": l1[0][ind], "pos_att": l1[1][ind], "neg_att": l1[2][ind] } doc.pred_class = 0 if d["rel"] < 0.5 else 1 doc.parameters = d num_correct += 1 * (doc.pred_class == doc.class_) num_seen += 1 if num_seen % 1000 == 0: print(num_correct / num_seen * 100)
class DBQA(DependencyParserBase): available_data_formats = { "word-based": NLPCC16DBQA, "character-based": NLPCC16DBQACharacterBased } default_data_format_name = "word-based" @classmethod def add_parser_arguments(cls, arg_parser): super(DBQA, cls).add_parser_arguments(arg_parser) group = arg_parser.add_argument_group(DBQA.__name__) group.add_argument("--external-embedding") group.add_argument("--batch-size", type=int, default=4096) group.add_argument("--embed-size", type=int, default=100) group.add_argument("--lstm-size", type=int, default=256) group.add_argument("--n-recur", type=int, default=2) group.add_argument("--use-bigram", type=int, default=1) group.add_argument("--input-keep-prob", type=int, default=1) group.add_argument("--recurrent-keep-prob", type=int, default=1) group.add_argument("--seed", type=int, default=42) group.add_argument("--steps", type=int, default=50000) group.add_argument("--merger-type", choices=["rnn", "cnn"], default="rnn") def __init__(self, options, data_train, session=None): self.statistics = DBQAStatistics.from_data(data_train) self.options = options self.optimizer = AdamOptimizer() self.global_step = tf.train.get_or_create_global_step() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.question_2d_pl = tf.placeholder(tf.int32, (None, None)) self.question_bigram_2d_pl = tf.placeholder(tf.int32, (None, None)) self.answer_2d_pl = tf.placeholder(tf.int32, (None, None)) self.answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None)) self.wrong_answer_2d_pl = tf.placeholder(tf.int32, (None, None)) self.wrong_answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None)) self.network = PairwiseSimilarity(options, self.statistics) self.loss, self.accuracy = self.network.get_loss( self.question_2d_pl, self.question_bigram_2d_pl, self.answer_2d_pl, self.answer_bigram_2d_pl, self.wrong_answer_2d_pl, self.wrong_answer_bigram_2d_pl, ) self.similarity = self.network.get_similarity( self.question_2d_pl, self.question_bigram_2d_pl, self.answer_2d_pl, self.answer_bigram_2d_pl) self.optimize_op = self.optimizer.minimize( self.loss, global_step=self.global_step) if session is None: self.session = self.create_session() self.session.run(tf.global_variables_initializer()) else: self.session = session self.random = Random(42) def create_session(self): config_proto = tf.ConfigProto() # config_proto.gpu_options.per_process_gpu_memory_fraction = self.options.per_process_gpu_memory_fraction return tf.Session(config=config_proto) def train(self, data_train): for questions_np, questions_bigram_np, \ corrects_np, corrects_bigram_np, \ wrongs_np, wrongs_bigram_np in generate_train_batches( data_train, self.options.batch_size, self.random ): step, loss, accuracy, _ = self.session.run( [self.global_step, self.loss, self.accuracy, self.optimize_op], { self.question_2d_pl: questions_np, self.question_bigram_2d_pl: questions_bigram_np, self.answer_2d_pl: corrects_np, self.answer_bigram_2d_pl: corrects_bigram_np, self.wrong_answer_2d_pl: wrongs_np, self.wrong_answer_bigram_2d_pl: wrongs_bigram_np }) logger.info("Train: Step {}, loss {}, accuracy {}".format( step, loss, accuracy)) @classmethod def repeat_train_and_validate(cls, data_train, data_devs, data_test, options): tf.set_random_seed(options.seed) parser = cls(options, data_train) for question in data_train: question.fill_ids(parser.statistics) for file_name, data_dev in data_devs.items(): for question in data_dev: question.fill_ids(parser.statistics) while True: step = parser.session.run(parser.global_step) if step > options.steps: break parser.random.shuffle(data_train) parser.train(data_train) for file_name, data_dev in data_devs.items(): try: prefix, suffix = os.path.basename(file_name).rsplit(".", 1) except ValueError: prefix = os.path.basename(file_name) suffix = "" dev_output = os.path.join( options.output, '{}_step_{}.{}'.format(prefix, step, suffix)) scores = list(parser.predict(data_dev)) with open(dev_output, "w") as f_output: for score in scores: f_output.write("{}\n".format(score)) @classmethod def load(cls, prefix, new_options=None): pass def predict(self, data_dev): for questions_np, questions_bigram_np,\ answer_np, answer_bigram_np in generate_predict_batches( data_dev, self.options.batch_size ): similarities = self.session.run( self.similarity, { self.question_2d_pl: questions_np, self.question_bigram_2d_pl: questions_bigram_np, self.answer_2d_pl: answer_np, self.answer_bigram_2d_pl: answer_bigram_np }) for similarity in similarities: yield similarity def save(self, prefix): pass