def forward_fixed_text(self, text_npy): #text = tf.constant(text_npy) #smaller than 2G, then ok... #but for safe in more application text = melt.load_constant(text_npy, self.sess) with tf.variable_scope(self.scope): text_feature = self.forward_text(text) return text_feature
def __init__(self, emb=None, fixed_emb=None, name=None, fixed_name=None, model_dir=None, model_name=None, sess=None): self._sess = sess or tf.InteractiveSession() if os.path.isdir(emb): model_dir = emb if model_dir is None: if isinstance(emb, str): emb = np.load(emb) emb = melt.load_constant(emb, name=name) if isinstance(fixed_emb, str): #fixed_emb is corpus embeddings, all sumed and normed already fixed_emb = melt.load(fixed_emb, name=fixed_emb) else: model_path = melt.get_model_path(model_dir, model_name) emb = tf.Variable(0., name=name, validate_shape=False) #emb = tf.Variable(0., name=name) #like word2vec the name is 'w_in' embedding_saver = tf.train.Saver({name: emb}) embedding_saver.restore(self._sess, model_path) #assume 0 index not used, 0 for PAD mask = zero_first_row(emb) emb = tf.multiply(emb, mask) self._emb = emb self._fixed_emb = fixed_emb self._normed_emb = None
def init_evaluate_constant_text(self, text_npy): #self.text = tf.constant(text_npy) if self.text is None: #if self.encoder_type == 'cnn': # import numpy as np # #text_npy = np.load(text_npy) # text_npy = text_npy[:5000] # #self.text = melt.load_constant_cpu(text_npy, self.sess) ##else: self.text = melt.load_constant(text_npy, self.sess)
def get_or_restore_embedding(name='emb', embedding_file=None, trainable=None, height=None, emb_dim=None, type='word'): # cpu for adgrad optimizer #if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model*ckpt*'): # logging.info('Word embedding random init or from model_dir:{} and trainable=:{}'.format( # FLAGS.model_dir, FLAGS.finetune_word_embedding)) #TODO verify below is ok , above is ok but a bit complex. I assume if var in check point will later restore and cover initital const value #if not FLAGS.word_embedding_file: embedding_file_ = None train_able_ = None if type == 'word': embedding_file_ = FLAGS.word_embedding_file train_able_ = FLAGS.finetune_word_embedding elif type == 'char': embedding_file_ = FLAGS.char_embedding_file train_able_ = FLAGS.finetune_char_embedding elif type == 'ngram': embedding_file_ = FLAGS.ngram_embedding_file train_able_ = FLAGS.finetune_ngram_embedding elif type == 'pinyin': embedding_file_ = FLAGS.pinyin_embedding_file train_able_ = FLAGS.finetune_pinyin_embedding else: raise ValueError(type) embedding_file = embedding_file if embedding_file is not None else embedding_file_ trainable = trainable if trainable is not None else train_able_ #logging.info('----------------------', type, embedding_file, height) if (not embedding_file) or melt.exists_model(FLAGS.model_dir): logging.info( '{} random init or from model_dir and trainable=:{}'.format( name, trainable)) emb = get_embedding(name=name, trainable=trainable, height=height, emb_dim=emb_dim) #melt.try_add_to_collection('word_embedding', emb) else: # https://github.com/tensorflow/tensorflow/issues/1570 # still adgrad must cpu.. # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info('Loading {} from:{} and trainable=:{}'.format( name, embedding_file, trainable)) timer = gezi.Timer('load constat') emb = melt.load_constant(embedding_file, name=name, trainable=trainable) timer.print_elapsed() return emb
def build_fixed_text_feature_graph(self, text_feature_npy): """ text features directly load to graph, @NOTICE text_feature_npy all vector must of same length used in evaluate.py for both fixed text and fixed words @FIXME dump text feature should change api """ with tf.variable_scope("image_text_sim"): image_feature = self.forward_image_feature(self.image_feature_feed) text_feature = melt.load_constant(self.sess, text_feature_npy) score = melt.cosine(image_feature, text_feature, nonorm=True) return score
def get_or_restore_embedding(name='emb'): # cpu for adgrad optimizer if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model.ckpt*'): logging.info( 'Word embedding random init or from model_dir:{} and trainable=:{}' .format(FLAGS.model_dir, FLAGS.finetune_word_embedding)) emb = get_embedding(name=name, trainable=FLAGS.finetune_word_embedding) melt.try_add_to_collection('word_embedding', emb) else: # https://github.com/tensorflow/tensorflow/issues/1570 # still adgrad must cpu.. # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info('Loading word embedding from:{} and trainable=:{}'.format( FLAGS.word_embedding_file, FLAGS.finetune_word_embedding)) emb = melt.load_constant(FLAGS.word_embedding_file, name=name, trainable=FLAGS.finetune_word_embedding) return emb
def init_evaluate_constant_text(self, text_npy): #self.text = tf.constant(text_npy) if self.text is None: self.text = melt.load_constant(self.sess, text_npy)