def main(_): #-----------init global resource melt.apps.train.init() FLAGS.vocab = FLAGS.vocab or os.path.join(os.path.dirname(FLAGS.model_dir), 'vocab.txt') image_util.init() vocabulary.init() text2ids.init() ## TODO FIXME if evaluator init before main graph(assistant predictor with image model) then will wrong for finetune later, ## image scope as not defined, to set reuse = None? though assistant in different scope graph still scope reused?? ## so right now just let evaluator lazy init, init when used after main graph build # try: # evaluator.init() # except Exception: # print(traceback.format_exc(), file=sys.stderr) # print('evaluator init fail will not do metric eval') # FLAGS.metric_eval = False logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) train()
def __init__(self, is_training=True, is_predict=False): super(DecomposableNLI, self).__init__() self.is_training = is_training self.is_predict = is_predict #TODO move to melt.EmbeddingTrainerBase emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self._attention_output_size = 256 self._comparison_output_size = 128 self.scope = 'decomposable_nli' self.build_train_graph = self.build_graph
def __init__(self, is_training=True, is_predict=False): super(RnnEncoder, self).__init__() self.is_training = is_training self.is_predict = is_predict vocabulary.init() if FLAGS.encoder_end_mark == '</S>': self.end_id = vocabulary.end_id() else: self.end_id = vocabulary.go_id( ) #NOTICE NUM_RESERVED_IDS must >= 3 TODO assert self.end_id != vocabulary.vocab.unk_id( ), 'input vocab generated without end id' create_rnn_cell = functools.partial(melt.create_rnn_cell, num_units=FLAGS.rnn_hidden_size, is_training=is_training, keep_prob=FLAGS.keep_prob, num_layers=FLAGS.num_layers, cell_type=FLAGS.cell) #follow models/textsum self.cell = create_rnn_cell( initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123)) if FLAGS.rnn_method == melt.rnn.EncodeMethod.bidirectional: self.bwcell = create_rnn_cell( initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113)) else: self.bwcell = None
def __init__(self, is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict self.gen_text_feature = None emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size #if not cpu and on gpu run and using adagrad, will fail TODO check why #also this will be more safer, since emb is large might exceed gpu mem #with tf.device('/cpu:0'): # #NOTICE if using bidirectional rnn then actually emb_dim is emb_dim / 2, because will at last step depth-concatate output fw and bw vectors # self.emb = melt.variable.get_weights_uniform('emb', [vocab_size, emb_dim], -init_width, init_width) self.emb = embedding.get_embedding_cpu('emb') melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width)
def __init__(self, is_training=True, is_predict=False): self.is_training = is_training self.is_predict = is_predict vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.end_id = vocabulary.end_id() self.get_start_id() assert self.end_id != vocabulary.vocab.unk_id(), 'input vocab generated without end id' self.emb_dim = emb_dim = FLAGS.emb_dim #--- for perf problem here exchange w_t and w https://github.com/tensorflow/tensorflow/issues/4138 self.num_units = num_units = FLAGS.rnn_hidden_size with tf.variable_scope('output_projection'): self.w_t = melt.variable.get_weights_truncated('w', [vocab_size, num_units], stddev=FLAGS.weight_stddev) self.w = tf.transpose(self.w_t) self.v = melt.variable.get_weights_truncated('v', [vocab_size], stddev=FLAGS.weight_stddev) self.cell = melt.create_rnn_cell( num_units=num_units, is_training=is_training, keep_prob=FLAGS.keep_prob, num_layers=FLAGS.num_layers, cell_type=FLAGS.cell) num_sampled = FLAGS.num_sampled if not (is_predict and FLAGS.predict_no_sample) else 0 self.softmax_loss_function = self.gen_sampled_softmax_loss_function(num_sampled)
def init(vocab_path=None): global vocab, Segmentor if vocab is None: vocabulary.init(vocab_path) print('ENCODE_UNK', ENCODE_UNK, file=sys.stderr) vocab = vocabulary.get_vocab() Segmentor = gezi.Segmentor()
def init(): #for evaluation without train will also use evaluator so only set log path in train.py #logging.set_logging_path(FLAGS.model_dir) test_dir = FLAGS.valid_resource_dir global all_distinct_texts, all_distinct_text_strs global vocab, vocab_size if all_distinct_texts is None: print('loading valid resorce from:', test_dir) vocabulary.init() vocab = vocabulary.vocab vocab_size = vocabulary.vocab_size if os.path.exists(test_dir + '/distinct_texts.npy'): all_distinct_texts = np.load(test_dir + '/distinct_texts.npy') else: all_distinct_texts = [] #to avoid outof gpu mem all_distinct_texts = all_distinct_texts[:FLAGS.max_texts] print('all_distinct_texts len:', len(all_distinct_texts), file=sys.stderr) #--padd it as test data set might be smaller in shape[1] all_distinct_texts = np.array( [gezi.nppad(text, TEXT_MAX_WORDS) for text in all_distinct_texts]) if FLAGS.feed_dict: all_distinct_texts = texts2ids(evaluator.all_distinct_text_strs) if os.path.exists(test_dir + '/distinct_text_strs.npy'): all_distinct_text_strs = np.load(test_dir + '/distinct_text_strs.npy') else: all_distinct_text_strs = []
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, vocabulary.vocab_path) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] #TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) #https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initializer = melt.slim2.init_ops.zeros_initializer if FLAGS.bias else None self.image_process_fn = lambda x: x if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=FLAGS.image_endpoint_feature_name) self.image_mlp_dims = [ int(x) for x in FLAGS.image_mlp_dims.split(',') ] if FLAGS.image_mlp_dims is not '0' else None self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.scope = 'image_text_sim'
def main(_): logging.init(logtostderr=True, logtofile=False) global_scope = '' InputApp.init() vocabulary.init() text2ids.init() if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo with tf.variable_scope(global_scope): test()
def train(): global vocab_size vocabulary.init() vocab_size = vocabulary.get_vocab_size() def seq2seq_criterion(vocabSize): weight = torch.ones(vocabSize) weight[0] = 0 crit = nn.NLLLoss(weight, size_average=False) if torch.cuda.is_available(): crit.cuda() return crit global criterion criterion = seq2seq_criterion(vocab_size) model = seq2seq.Seq2Seq(vocab_size, FLAGS.emb_dim, FLAGS.rnn_hidden_size, FLAGS.batch_size) if torch.cuda.is_available(): model.cuda() init_range = 0.08 model.init_weights(init_range) optimizer = optim.Adagrad(model.parameters(), lr=FLAGS.learning_rate) inputs, decode = input.get_decodes(FLAGS.shuffle_then_decode, FLAGS.dynamic_batch_length) inputs = functools.partial( inputs, decode=decode, num_epochs=FLAGS.num_epochs, num_threads=FLAGS.num_threads, batch_join=FLAGS.batch_join, shuffle_batch=FLAGS.shuffle_batch, shuffle=FLAGS.shuffle, allow_smaller_final_batch=True, ) ops = inputs(FLAGS.input, batch_size=FLAGS.batch_size) print(ops) eval_ops = None if FLAGS.valid_input: #eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size*10) eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size) timer = Timer() tf_flow(lambda sess, step: process_once(sess, step, ops, eval_ops, model, optimizer)) print(timer.elapsed())
def get_embedding(name='emb'): emb_dim = FLAGS.emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() ##NOTICE if using bidirectional rnn then actually emb_dim is emb_dim / 2, because will as last step depth-concatate output fw and bw vectors init_width = 0.5 / emb_dim emb = melt.variable.get_weights_uniform(name, [vocab_size, emb_dim], -init_width, init_width) #return to above code if this works not better #emb = melt.variable.get_weights_truncated(name, [vocab_size, emb_dim], stddev=FLAGS.weight_stddev) return emb
def get_embedding(name='emb', height=None, emb_dim=None, trainable=True): emb_dim = emb_dim or FLAGS.emb_dim if height is None: vocabulary.init() height = vocabulary.get_vocab_size() init_width = 0.5 / emb_dim emb = melt.variable.get_weights_uniform(name, [height, emb_dim], -init_width, init_width, trainable=trainable) #return to above code if this works not better #emb = melt.variable.get_weights_truncated(name, [vocab_size, emb_dim], stddev=FLAGS.weight_stddev) return emb
def __init__(self, is_training=True, is_predict=False): self.scope = 'rnn' self.is_training = is_training self.is_predict = is_predict assert not (FLAGS.decode_copy and FLAGS.decode_use_alignment) vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.end_id = vocabulary.end_id() self.get_start_id() assert self.end_id != vocabulary.vocab.unk_id( ), 'input vocab generated without end id' self.emb_dim = emb_dim = FLAGS.emb_dim #--- for perf problem here exchange w_t and w https://github.com/tensorflow/tensorflow/issues/4138 self.num_units = num_units = FLAGS.rnn_hidden_size with tf.variable_scope('output_projection'): self.w_t = melt.variable.get_weights_truncated( 'w_t', [vocab_size, num_units], stddev=FLAGS.weight_stddev) #weights self.w = tf.transpose(self.w_t) #biases self.v = melt.variable.get_weights_truncated( 'v', [vocab_size], stddev=FLAGS.weight_stddev) #TODO https://github.com/tensorflow/tensorflow/issues/6761 tf 1.0 will fail if not scope='rnn' the same as when using self.cell... self.cell = melt.create_rnn_cell(num_units=num_units, is_training=is_training, keep_prob=FLAGS.keep_prob, num_layers=FLAGS.num_layers, cell_type=FLAGS.cell) num_sampled = FLAGS.num_sampled if not ( is_predict and FLAGS.predict_no_sample) else 0 self.softmax_loss_function = melt.seq2seq.gen_sampled_softmax_loss_function( num_sampled, self.vocab_size, self.w_t, self.v, FLAGS.log_uniform_sample, is_predict=self.is_predict, sample_seed=FLAGS.predict_sample_seed, vocabulary=vocabulary)
def get_decodes(shuffle_then_decode, dynamic_batch_length): vocabulary.init() global encoder_end_id if FLAGS.encoder_end_mark == '</S>': encoder_end_id = vocabulary.end_id() else: encoder_end_id = vocabulary.go_id( ) #NOTICE NUM_RESERVED_IDS must >= 3 TODO assert encoder_end_id != vocabulary.vocab.unk_id( ), 'input vocab generated without end id' if shuffle_then_decode: inputs = melt.shuffle_then_decode.inputs decode = lambda x: decode_examples(x, dynamic_batch_length) else: inputs = melt.decode_then_shuffle.inputs decode = lambda x: decode_example(x, dynamic_batch_length) return inputs, decode
def main(_): #-----------init global resource logging.set_logging_path(gezi.get_dir(FLAGS.model_dir)) vocabulary.init() text2ids.init() #evaluator.init() logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global_scope = '' if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) with tf.variable_scope(global_scope): train()
def main(_): #-----------init global resource logging.set_logging_path(gezi.get_dir(FLAGS.model_dir)) melt.apps.train.init() has_image_model = FLAGS.image_checkpoint_file and os.path.exists( FLAGS.image_checkpoint_file) if has_image_model: print('image_endpoint_feature_name:', FLAGS.image_endpoint_feature_name) melt.apps.image_processing.init( FLAGS.image_model_name, feature_name=FLAGS.image_endpoint_feature_name) FLAGS.pre_calc_image_feature = FLAGS.pre_calc_image_feature or ( not has_image_model) vocabulary.init() text2ids.init() ## TODO FIXME if evaluator init before main graph(assistant preidictor with image model) then will wrong for finetune later, ## image scope as not defined, to set reuse = None? though assistant in different scope graph still scope reused?? ## so right now just let evaluator lazy init, init when used after main graph build # try: # evaluator.init() # except Exception: # print(traceback.format_exc(), file=sys.stderr) # print('evaluator init fail will not do metric eval') # FLAGS.metric_eval = False logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) global_scope = melt.apps.train.get_global_scope() with tf.variable_scope(global_scope): train()
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DualTextsim, self).__init__() self.is_training = is_training self.is_predict = is_predict self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer self.emb = embedding.get_or_restore_embedding_cpu() self.pos_emb = embedding.get_position_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] # TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None self.mlp_dims = [int(x) for x in FLAGS.mlp_dims.split(',') ] if FLAGS.mlp_dims is not '0' else None #needed in build graph from PairwiseGraph self.scope = 'dual_textsim' self.build_train_graph = self.build_graph
def main(_): #-----------init global resource logging.set_logging_path(gezi.get_dir(FLAGS.model_dir)) if not FLAGS.pre_calc_image_feature: melt.apps.image_processing.init() InputApp.init() vocabulary.init() text2ids.init() evaluator.init() logging.info('algo:{}'.format(FLAGS.algo)) logging.info('monitor_level:{}'.format(FLAGS.monitor_level)) global sess sess = melt.get_session(log_device_placement=FLAGS.log_device_placement) global_scope = '' if FLAGS.add_global_scope: global_scope = FLAGS.global_scope if FLAGS.global_scope else FLAGS.algo with tf.variable_scope(global_scope): train()
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size #if not cpu and on gpu run and using adagrad, will fail TODO check why #also this will be more safer, since emb is large might exceed gpu mem #with tf.device('/cpu:0'): # self.emb = melt.variable.get_weights_uniform('emb', [vocab_size, emb_dim], -init_width, init_width) if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model.ckpt*'): logging.info( 'Word embedding random init or from model_dir :{} and finetune=:{}' .format(FLAGS.model_dir, FLAGS.finetune_word_embedding)) self.emb = embedding.get_embedding_cpu( name='emb', trainable=FLAGS.finetune_word_embedding) else: #https://github.com/tensorflow/tensorflow/issues/1570 #still adgrad must cpu.. #if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? #or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info( 'Loading word embedding from :{} and finetune=:{}'.format( FLAGS.word_embedding_file, FLAGS.finetune_word_embedding)) self.emb = melt.load_constant_cpu( FLAGS.word_embedding_file, name='emb', trainable=FLAGS.finetune_word_embedding) melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] #TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) #https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width) self.image_mlp_dims = [ int(x) for x in FLAGS.image_mlp_dims.split(',') ] if FLAGS.image_mlp_dims is not '0' else None self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.scope = 'image_text_sim'
def __init__(self, is_training=True, is_predict=False): super(MilTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.image_process_fn = lambda x: x if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=FLAGS.image_endpoint_feature_name) ImageEncoder = deepiu.seq2seq.image_encoder.Encoders[ FLAGS.image_encoder] self.image_encoder = ImageEncoder(is_training, is_predict, FLAGS.emb_dim) self.using_attention = FLAGS.image_encoder != 'ShowAndTell' assert self.using_attention with tf.variable_scope('text_encoder'): if FLAGS.text_encoder: self.text_encoder = encoder_factory.get_encoder( FLAGS.text_encoder, is_training, is_predict) else: self.text_encoder = None self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.activation = melt.activations[FLAGS.activation] self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.biases_initializer = melt.slim2.init_ops.zeros_initializer if FLAGS.bias else None logging.info('mil text_encoder:{}'.format(self.text_encoder)) if FLAGS.use_idf_weights: self.idf_weights = tf.constant(idf.get_idf()) else: self.idf_weights = tf.constant( [0.] * NUM_RESERVED_IDS + [1.0 for id in range(NUM_RESERVED_IDS, vocab_size)]) self.scope = FLAGS.trainer_scope or 'image_text_sim'
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DualTextsim, self).__init__() self.is_training = is_training self.is_predict = is_predict self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model.ckpt*'): logging.info( 'Word embedding random init or from model_dir :{} and finetune=:{}' .format(FLAGS.model_dir, FLAGS.finetune_word_embedding)) self.emb = embedding.get_embedding_cpu( name='emb', trainable=FLAGS.finetune_word_embedding) else: # https://github.com/tensorflow/tensorflow/issues/1570 # still adgrad must cpu.. # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info( 'Loading word embedding from :{} and finetune=:{}'.format( FLAGS.word_embedding_file, FLAGS.finetune_word_embedding)) self.emb = melt.load_constant_cpu( FLAGS.word_embedding_file, name='emb', trainable=FLAGS.finetune_word_embedding) if FLAGS.position_embedding: logging.info('Using position embedding') self.pos_emb = embedding.get_embedding_cpu(name='pos_emb', height=TEXT_MAX_WORDS) else: self.pos_emb = None melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] # TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None self.mlp_dims = [int(x) for x in FLAGS.mlp_dims.split(',') ] if FLAGS.mlp_dims is not '0' else None self.scope = 'dual_textsim' self.build_train_graph = self.build_graph
def __init__(self, is_training=True, is_predict=False): self.scope = 'rnn' self.is_training = is_training self.is_predict = is_predict assert not (FLAGS.decode_copy and FLAGS.decode_use_alignment) vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.end_id = vocabulary.end_id() self.start_id = None self.get_start_id() assert self.end_id != vocabulary.vocab.unk_id( ), 'input vocab generated without end id' self.emb_dim = emb_dim = FLAGS.emb_dim #--- for perf problem here exchange w_t and w https://github.com/tensorflow/tensorflow/issues/4138 self.num_units = num_units = FLAGS.rnn_hidden_size with tf.variable_scope('output_projection'): self.w_t = melt.variable.get_weights_truncated( 'w_t', [vocab_size, num_units], stddev=FLAGS.weight_stddev) #weights self.w = tf.transpose(self.w_t) #biases self.v = melt.variable.get_weights_truncated( 'v', [vocab_size], stddev=FLAGS.weight_stddev) #TODO https://github.com/tensorflow/tensorflow/issues/6761 tf 1.0 will fail if not scope='rnn' the same as when using self.cell... self.cell = melt.create_rnn_cell(num_units=num_units, is_training=is_training, keep_prob=FLAGS.keep_prob, num_layers=FLAGS.num_layers, cell_type=FLAGS.cell) self.num_sampled = num_sampled = FLAGS.num_sampled if not ( is_predict and FLAGS.predict_no_sample) else 0 #self.softmax_loss_function is None means not need sample self.softmax_loss_function = None if FLAGS.gen_only: self.softmax_loss_function = melt.seq2seq.gen_sampled_softmax_loss_function( num_sampled, self.vocab_size, weights=self.w_t, biases=self.v, log_uniform_sample=FLAGS.log_uniform_sample, is_predict=self.is_predict, sample_seed=FLAGS.predict_sample_seed, vocabulary=vocabulary) if FLAGS.use_attention: print('----attention_option:', FLAGS.attention_option) if FLAGS.gen_copy_switch or FLAGS.gen_copy or FLAGS.copy_only: assert FLAGS.use_attention is True, 'must use attention if not gen_only mode seq2seq' FLAGS.gen_only = False if FLAGS.gen_copy_switch: print('-------gen copy switch mode!') FLAGS.gen_copy = False FLAGS.copy_only = False elif FLAGS.gen_copy: print('-------gen copy mode !') FLAGS.copy_only = False else: print('-------copy only mode !') else: print('--------gen only mode') #if use copy mode use score as alignment(no softmax) self.score_as_alignment = False if FLAGS.gen_only else True #gen only output_fn self.output_fn = lambda cell_output: melt.dense( cell_output, self.w, self.v) def copy_output(indices, batch_size, cell_output, cell_state): alignments = cell_state.alignments updates = alignments return tf.scatter_nd(indices, updates, shape=[batch_size, self.vocab_size]) self.copy_output_fn = copy_output #one problem is big memory for large vocabulary def gen_copy_output(indices, batch_size, cell_output, cell_state): gen_logits = self.output_fn(cell_output) copy_logits = copy_output(indices, batch_size, cell_output, cell_state) if FLAGS.gen_copy_switch: gen_probability = cell_state.gen_probability #[batch_size, 1] * [batch_size, vocab_size] return gen_probability * tf.nn.softmax(gen_logits) + ( 1 - gen_probability) * tf.nn.softmax(copy_logits) else: return gen_logits + copy_logits self.gen_copy_output_fn = gen_copy_output def gen_copy_output_train(time, indices, targets, sampled_values, batch_size, cell_output, cell_state): if self.softmax_loss_function is not None: labels = tf.slice(targets, [0, time], [-1, 1]) sampled, true_expected_count, sampled_expected_count = sampled_values sampled_values = \ sampled, tf.slice(tf.reshape(true_expected_count, [batch_size, -1]), [0, time], [-1, 1]), sampled_expected_count sampled_ids, sampled_logits = melt.nn.compute_sampled_ids_and_logits( weights=self.w_t, biases=self.v, labels=labels, inputs=cell_output, num_sampled=self.num_sampled, num_classes=self.vocab_size, sampled_values=sampled_values, remove_accidental_hits=False) gen_indices = melt.batch_values_to_indices( tf.to_int32(sampled_ids)) gen_logits = tf.scatter_nd(gen_indices, sampled_logits, shape=[batch_size, self.vocab_size]) else: gen_logits = self.output_fn(cell_output) copy_logits = copy_output(indices, batch_size, cell_output, cell_state) if FLAGS.gen_copy_switch: #gen_copy_switch == True. gen_probability = cell_state.gen_probability return gen_probability * tf.nn.softmax(gen_logits) + ( 1 - gen_probability) * tf.nn.softmax(copy_logits) else: return gen_logits + copy_logits self.gen_copy_output_train_fn = gen_copy_output_train
import sys, os from deepiu.util.sim_predictor import SimPredictor from deepiu.util import vocabulary import melt image_dir = '/home/gezi/data2/data/ai_challenger/image_caption/pic/' image_file = '6275b5349168ac3fab6a493c509301d023cf39d3.jpg' if len(sys.argv) > 1: image_file = sys.argv[1] image_path = os.path.join(image_dir, image_file) image_model_checkpoint_path = '/home/gezi/data/image_model_check_point/inception_resnet_v2_2016_08_30.ckpt' model_dir = '/home/gezi/new/temp/image-caption/ai-challenger/model/bow/' vocab_path = '/home/gezi/new/temp/image-caption/ai-challenger/tfrecord/seq-basic/vocab.txt' vocabulary.init(vocab_path) vocab = vocabulary.vocab predictor = SimPredictor(model_dir, image_model_checkpoint_path, image_model_name='InceptionResnetV2') scores, word_ids = predictor.top_words([melt.read_image(image_path)], 50) scores = scores[0] word_ids = word_ids[0] for word_id, score in zip(word_ids, scores): print(vocab.key(int(word_id)), score)