def __init__(self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_units=128, mlp_units=[256, 512, 128] ): self.ref=Referenced(data_dir, frword2vec, ref_method) self.unref=Unreferenced(qmax_length, rmax_length, os.path.join(data_dir,fqembed), os.path.join(data_dir,frembed), gru_units, mlp_units, train_dir=train_dir)
def __init__(self, word2vec_file, query_w2v_file, reply_w2v_file, train_dir, query_max_len=20, reply_max_len=30, pooling_type='max_min', gru_units=128, mlp_units=None): if mlp_units is None: mlp_units = [256, 512, 128] logger.info('creating ref model') self.ref = Referenced(word2vec_file, pooling_type) logger.info('creating unref model') self.unref = Unreferenced(query_max_len, reply_max_len, query_w2v_file, reply_w2v_file, gru_units, mlp_units, train_dir=train_dir)
def __init__( self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_num_units=512, mlp_units=[256, 512, 128], init_learning_rate=0.001, margin=0.5, batch_norm=False, is_training=True, train_dataset='', log_dir="training", scramble=False, additional_negative_samples='', ): print("Initializing referenced model") self.ref = Referenced(data_dir, frword2vec, ref_method) print("Initializing unreferenced model with log_dir " + log_dir + " and ref method " + ref_method) self.unref = Unreferenced( qmax_length, rmax_length, os.path.join(data_dir, fqembed), os.path.join(data_dir, frembed), gru_num_units=gru_num_units, mlp_units=mlp_units, init_learning_rate=init_learning_rate, margin=margin, is_training=is_training, batch_norm=batch_norm, train_dataset=train_dataset, log_dir=log_dir, scramble=scramble, additional_negative_samples=additional_negative_samples)
def __init__(self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_units=128, mlp_units=[256, 512, 128], is_training=True): print("Initializing referenced model") self.ref = Referenced(data_dir, frword2vec, ref_method) print("Initializing unreferenced model") self.unref = Unreferenced(qmax_length, rmax_length, os.path.join(data_dir, fqembed), os.path.join(data_dir, frembed), gru_units, mlp_units, train_dir=train_dir, is_training=is_training)
class Hybrid(object): def __init__(self, word2vec_file, query_w2v_file, reply_w2v_file, train_dir, query_max_len=20, reply_max_len=30, pooling_type='max_min', gru_units=128, mlp_units=None): if mlp_units is None: mlp_units = [256, 512, 128] logger.info('creating ref model') self.ref = Referenced(word2vec_file, pooling_type) logger.info('creating unref model') self.unref = Unreferenced(query_max_len, reply_max_len, query_w2v_file, reply_w2v_file, gru_units, mlp_units, train_dir=train_dir) def train_unref(self, query_file, reply_file): logger.info('training unref model') self.unref.train(query_file, reply_file) def _normalize(self, scores): smin = min(scores) smax = max(scores) diff = smax - smin ret = [(s - smin) / diff for s in scores] return ret def get_ref_scores(self, reply_file, generated_file): logger.info('computing ref_scores') ref_scores = self.ref.get_scores(reply_file, generated_file) ref_scores = self._normalize(ref_scores) return ref_scores def get_unref_scores(self, generated_file, query_file, query_vocab_file, reply_vocab_file): logger.info('computing unref_scores') unref_scores = self.unref.get_scores(query_file, generated_file, query_vocab_file, reply_vocab_file) unref_scores = self._normalize(unref_scores) return unref_scores def get_scores(self, query_file, reply_file, generated_file, query_vocab_file, reply_vocab_file): ref_scores = self.get_ref_scores(reply_file, generated_file) unref_scores = self.get_unref_scores(generated_file, query_file, query_vocab_file, reply_vocab_file) # min() combiner. return [min(a, b) for a, b in zip(ref_scores, unref_scores)]
class Hybrid(): def __init__(self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_units=128, mlp_units=[256, 512, 128]): self.ref = Referenced(data_dir, frword2vec, ref_method) self.unref = Unreferenced(qmax_length, rmax_length, os.path.join(data_dir, fqembed), os.path.join(data_dir, frembed), gru_units, mlp_units, train_dir=train_dir) def train_unref(self, data_dir, fquery, freply): self.unref.train(data_dir, fquery, freply) def normalize(self, scores): smin = min(scores) smax = max(scores) diff = smax - smin ret = [(s - smin) / diff for s in scores] return ret def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab): ref_scores = self.ref.scores(data_dir, freply, fgenerated) ref_scores = self.normalize(ref_scores) unref_scores = self.unref.scores(data_dir, fquery, fgenerated, fqvocab, frvocab) unref_socres = self.normalize(unref_scores) return [min(a, b) for a, b in zip(ref_scores, unref_scores)]
class Hybrid(): def __init__( self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_num_units=512, mlp_units=[256, 512, 128], init_learning_rate=0.001, margin=0.5, batch_norm=False, is_training=True, train_dataset='', log_dir="training", scramble=False, additional_negative_samples='', ): print("Initializing referenced model") self.ref = Referenced(data_dir, frword2vec, ref_method) print("Initializing unreferenced model with log_dir " + log_dir + " and ref method " + ref_method) self.unref = Unreferenced( qmax_length, rmax_length, os.path.join(data_dir, fqembed), os.path.join(data_dir, frembed), gru_num_units=gru_num_units, mlp_units=mlp_units, init_learning_rate=init_learning_rate, margin=margin, is_training=is_training, batch_norm=batch_norm, train_dataset=train_dataset, log_dir=log_dir, scramble=scramble, additional_negative_samples=additional_negative_samples) def train_unref(self, data_dir, fquery, freply, validation_fquery, validation_freply_true): print("training unreferenced metric") self.unref.train(data_dir, fquery, freply, validation_fquery, validation_freply_true) def normalize(self, scores, smin=None, smax=None, coefficient=None, smallest_value=0): if not smin and not smax: smin = min(scores) smax = max(scores) diff = smax - smin # normalize to [0-2] instead to fit RUBER human scores else: smin = smin diff = smax - smin if coefficient: ret = [ smallest_value + (coefficient * (s - smin) / diff) for s in scores ] else: ret = [smallest_value + ((s - smin) / diff) for s in scores] return ret def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab, checkpoint_dir): ref_scores = self.ref.scores(data_dir, freply, fgenerated) norm_ref_scores = self.normalize(ref_scores, coefficient=4, smallest_value=1) unref_scores = self.unref.scores(data_dir, fquery, fgenerated, fqvocab, frvocab, checkpoint_dir, init=False) norm_unref_scores = self.normalize(unref_scores, coefficient=4, smallest_value=1) return [ np.mean([a, b]) for a, b in zip(norm_ref_scores, norm_unref_scores) ], ref_scores, norm_ref_scores, unref_scores, norm_unref_scores def validate_to_csv(self, checkpoint_dir, data_dir, validation_fquery, validation_freply_generated, validation_freply_true, training_fquery, qmax_length, training_freply, rmax_length, train_dataset, validation_dataset): print("Starting validation") scores, ref_scores, norm_ref_scores, unref_scores, norm_unref_scores \ = self.scores(data_dir, validation_fquery, validation_freply_true, validation_freply_generated, \ '%s.vocab%d'%(training_fquery, qmax_length),'%s.vocab%d'%(training_freply, rmax_length), checkpoint_dir) csv_dir = os.path.join('./results', checkpoint_dir, validation_dataset) print(csv_dir) reply_file_path = validation_freply_generated.split("/") reply_file = reply_file_path[len(reply_file_path) - 1] print(reply_file) csv_title = os.path.join(csv_dir, reply_file.rstrip(".txt") + ".csv") print("Csv title: ") print(csv_title) if not os.path.exists(csv_dir): os.makedirs(csv_dir) """write results to CSV""" with open(csv_title, 'w+') as csvfile: writer = csv.writer(csvfile, delimiter=',') column_titles = [ "Query", "Scored reply", "Ground truth reply", "Score", "Ref score", "Normed ref score", "Unref score", "Normed unref score" ] writer.writerow([col for col in column_titles]) with open(os.path.join(data_dir, validation_fquery), "r") as queries, \ open(os.path.join(data_dir, validation_freply_generated), "r") as scored_replies, \ open(os.path.join(data_dir, validation_freply_true), "r") as true_replies: for query, scored_reply, true_reply, score, ref_score, norm_ref_score, unref_score, norm_unref_score in zip( queries, scored_replies, true_replies, scores, ref_scores, norm_ref_scores, unref_scores, norm_unref_scores): query = query.rstrip() scored_reply = scored_reply.rstrip() true_reply = true_reply.rstrip() writer.writerow([ query, scored_reply, true_reply, score, ref_score, norm_ref_score, unref_score, norm_unref_score ]) csvfile.close() print( "max score: {}, min score: {}, median score: {}, mean score: {}, median norm ref: {}, min unnorm ref: {}, max unnorm ref: {}, median norm unref: {}, min unnorm unref: {}, max unnorm unref: {}" ).format(max(scores), min(scores), median(scores), mean(scores), median(norm_ref_scores), min(ref_scores), max(ref_scores), median(norm_unref_scores), min(unref_scores), max(unref_scores)) print("Wrote model results to " + csv_title)
class Hybrid(): def __init__(self, data_dir, frword2vec, fqembed, frembed, qmax_length=20, rmax_length=30, ref_method='max_min', gru_units=128, mlp_units=[256, 512, 128], is_training=True): print("Initializing referenced model") self.ref = Referenced(data_dir, frword2vec, ref_method) print("Initializing unreferenced model") self.unref = Unreferenced(qmax_length, rmax_length, os.path.join(data_dir, fqembed), os.path.join(data_dir, frembed), gru_units, mlp_units, train_dir=train_dir, is_training=is_training) def train_unref(self, data_dir, fquery, freply): print("training unreferenced metric") self.unref.train(data_dir, fquery, freply) def normalize(self, scores, smin=None, smax=None, coefficient=None, smallest_value=0): if not smin and not smax: smin = min(scores) smax = max(scores) diff = smax - smin # normalize to [0-2] instead to fit RUBER human scores else: smin = smin diff = smax - smin if coefficient: ret = [ smallest_value + (coefficient * (s - smin) / diff) for s in scores ] else: ret = [smallest_value + ((s - smin) / diff) for s in scores] return ret def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab): print("training dir is ") print(train_dir) ref_scores = self.ref.scores(data_dir, freply, fgenerated, train_dir=train_dir) norm_ref_scores = self.normalize(ref_scores, coefficient=4, smallest_value=1) unref_scores = self.unref.scores(data_dir, fquery, fgenerated, fqvocab, frvocab, init=False, train_dir=train_dir) norm_unref_scores = self.normalize(unref_scores, coefficient=4, smallest_value=1) return [ np.mean([a, b]) for a, b in zip(norm_ref_scores, norm_unref_scores) ], ref_scores, norm_ref_scores, unref_scores, norm_unref_scores