class FastZeroTagModel(AttalosModel): """ Create a tensorflow graph that finds the principal direction of the target word embeddings (with negative sampling), using the loss function from "Fast Zero-Shot Image Tagging". """ def __init__(self, wv_model, datasets, **kwargs): self.wv_model = wv_model self.one_hot = OneHot(datasets, valid_vocab=wv_model.vocab) word_counts = NegativeSampler.get_wordcount_from_datasets(datasets, self.one_hot) self.negsampler = NegativeSampler(word_counts) self.w = construct_W(wv_model, self.one_hot.get_key_ordering()).T self.learning_rate = kwargs.get("learning_rate", 0.0001) self.optim_words = kwargs.get("optim_words", True) self.hidden_units = kwargs.get("hidden_units", "200") self.use_batch_norm = kwargs.get("use_batch_norm",False) self.opt_type = kwargs.get("opt_type","adam") if self.hidden_units=='0': self.hidden_units=[] else: self.hidden_units = [int(x) for x in self.hidden_units.split(',')] self.model_info = dict() # Placeholders for data self.model_info['input'] = tf.placeholder(shape=(None, datasets[0].img_feat_size), dtype=tf.float32) self.model_info['pos_ids'] = tf.placeholder(dtype=tf.int32) self.model_info['neg_ids'] = tf.placeholder(dtype=tf.int32) self.model_info['w2v'] = tf.Variable(self.w, dtype=tf.float32) self.model_info['pos_vecs'] = tf.transpose(tf.nn.embedding_lookup(self.model_info['w2v'], self.model_info['pos_ids']), perm=[1,0,2]) self.model_info['neg_vecs'] = tf.transpose(tf.nn.embedding_lookup(self.model_info['w2v'], self.model_info['neg_ids']), perm=[1,0,2]) # Construct fully connected layers layer = self.model_info['input'] layers = [] for i, hidden_size in enumerate(self.hidden_units): layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) if self.use_batch_norm: layer = tf.contrib.layers.batch_norm(layer) layers.append(layer) logger.info("Using batch normalization") # Output layer should always be linear layer = tf.contrib.layers.linear(layer, self.w.shape[1]) layers.append(layer) self.model_info['layers'] = layers self.model_info['prediction'] = layer def fztloss( f, pVecs, nVecs ): """ Tensorized cost function from Fast Zero-Shot Learning paper Args: f: The output from the network, a tensor of shape (# images, word embedding size) pVecs: The vector embeddings of the ground truth tags, a tensor of shape (# images, # positive tags, word embedding size) nVecs: The vector embeddings of negatively sampled tags, a tensor of shape (# images, # negative samples, word embedding size) Returns: Scalar tensor representing the batch cost """ posmul = tf.mul(pVecs, f) negmul = tf.mul(nVecs, f) tfpos = tf.reduce_sum(posmul, reduction_indices=2) tfneg = tf.reduce_sum(negmul, reduction_indices=2) tfpos = tf.transpose(tfpos, [1,0]) tfneg = tf.transpose(tfneg, [1,0]) negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] ) posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1]) differences = tf.sub(negexpan, posexpan) return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2])) loss = fztloss(self.model_info['prediction'], self.model_info['pos_vecs'], self.model_info['neg_vecs']) self.model_info['loss'] = loss if self.opt_type=='sgd': optimizer=tf.train.GradientDescent else: optimizer=tf.train.AdamOptimizer self.model_info['optimizer'] = optimizer(learning_rate=self.learning_rate).minimize(loss) def predict_feats(self, sess, x): return sess.run(self.model_info['prediction'], feed_dict={self.model_info['input']: x}) def _get_ids(self, tag_ids, numSamps=[5, 10], uniform_sampling=False): """ Takes a batch worth of text tags and returns positive/negative ids """ pos_word_ids = np.ones((len(tag_ids), numSamps[0]), dtype=np.int32) pos_word_ids.fill(-1) for ind, tags in enumerate(tag_ids): if len(tags) > 0: pos_word_ids[ind] = np.random.choice(tags, size=numSamps[0]) neg_word_ids = None if uniform_sampling: neg_word_ids = np.random.randint(0, self.one_hot.vocab_size, size=(len(tag_ids), numSamps[1])) else: neg_word_ids = np.ones((len(tag_ids), numSamps[1]), dtype=np.int32) neg_word_ids.fill(-1) for ind in range(pos_word_ids.shape[0]): # TODO: Check to see if this benefits from the same bug as negsampling code neg_word_ids[ind] = self.negsampler.negsamp_ind(pos_word_ids[ind], numSamps[1]) return pos_word_ids, neg_word_ids def prep_fit(self, data): img_feats, text_feats_list = data text_feat_ids = [] for tags in text_feats_list: text_feat_ids.append([self.one_hot.get_index(tag) for tag in tags if tag in self.one_hot]) pos_ids, neg_ids = self._get_ids(text_feat_ids) self.pos_ids = pos_ids self.neg_ids = neg_ids fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_ids"]: pos_ids, self.model_info["neg_ids"]: neg_ids } return fetches, feed_dict def prep_predict(self, dataset, cross_eval=False): if cross_eval: self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_w = construct_W(self.wv_model, self.test_one_hot.get_key_ordering()).T else: self.test_one_hot = self.one_hot self.test_w = self.w x = [] y = [] for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) y = np.asarray(y) fetches = [self.model_info["prediction"], ] feed_dict = { self.model_info["input"]: x } truth = y return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): predictions = predict_fetches[0] if cross_eval and self.test_w is None: raise Exception("test_w is not set. Did you call prep_predict?") predictions = np.dot(predictions, self.test_w.T) return predictions def get_training_loss(self, fit_fetches): return fit_fetches[1]
class NegSamplingModel(AttalosModel): """ This model performs negative sampling. """ def _construct_model_info(self, input_size, output_size, learning_rate, wv_arr, hidden_units=[200], optim_words=True, opt_type='adam', use_batch_norm=True, weight_decay=0.0): model_info = {} model_info["input"] = tf.placeholder(shape=(None, input_size), dtype=tf.float32) if optim_words: model_info["pos_vecs"] = tf.placeholder(dtype=tf.float32) model_info["neg_vecs"] = tf.placeholder(dtype=tf.float32) logger.info("Optimization on GPU, word vectors are stored separately.") else: model_info["w2v"] = tf.Variable(wv_arr, dtype=tf.float32) model_info["pos_ids"] = tf.placeholder(dtype=tf.int32) model_info["neg_ids"] = tf.placeholder(dtype=tf.int32) model_info["pos_vecs"] = tf.transpose(tf.nn.embedding_lookup(model_info["w2v"], model_info["pos_ids"]), perm=[1,0,2]) model_info["neg_vecs"] = tf.transpose(tf.nn.embedding_lookup(model_info["w2v"], model_info["neg_ids"]), perm=[1,0,2]) logger.info("Not optimizing word vectors.") # Construct fully connected layers layers = [] layer = model_info["input"] for i, hidden_size in enumerate(hidden_units): layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) if use_batch_norm: layer = tf.contrib.layers.batch_norm(layer) layers.append(layer) # Output layer should always be linear layer = tf.contrib.layers.linear(layer, wv_arr.shape[1]) layers.append(layer) model_info["layers"] = layers model_info["prediction"] = layer def meanlogsig(predictions, truth): reduction_indices = 2 return tf.reduce_mean(tf.log(tf.sigmoid(tf.reduce_sum(predictions * truth, reduction_indices=reduction_indices)))) pos_loss = meanlogsig(model_info["prediction"], model_info["pos_vecs"]) neg_loss = meanlogsig(-model_info["prediction"], model_info["neg_vecs"]) model_info["loss"] = -(pos_loss + neg_loss) # Decide whether or not to use SGD or Adam Optimizers if self.opt_type == 'sgd': logger.info("Optimization uses SGD with non-variable rate") optimizer = tf.train.GradientDescentOptimizer else: logger.info("Optimization uses Adam") optimizer = tf.train.AdamOptimizer # Are we manually decaying the words? Create a TF variable in that case. if weight_decay: logger.info("Learning rate is manually adaptive, dropping every ten (hard coded) epoch") model_info['learning_rate'] = tf.placeholder(tf.float32, shape=[]) else: model_info['learning_rate'] = learning_rate model_info["optimizer"] = optimizer(learning_rate=model_info['learning_rate']).minimize(model_info["loss"]) #model_info["init_op"] = tf.initialize_all_variables() #model_info["saver"] = tf.train.Saver() return model_info def __init__(self, wv_model, datasets, **kwargs): self.wv_model = wv_model self.one_hot = OneHot(datasets, valid_vocab=wv_model.vocab) word_counts = NegativeSampler.get_wordcount_from_datasets(datasets, self.one_hot) self.negsampler = NegativeSampler(word_counts) train_dataset = datasets[0] # train_dataset should always be first in datasets self.w = construct_W(wv_model, self.one_hot.get_key_ordering()).T scale_words = kwargs.get("scale_words",1.0) if scale_words == 0.0: self.w = (self.w.T / np.linalg.norm(self.w,axis=1)).T else: self.w *= scale_words # Optimization parameters # Starting learning rate, currently default to 0.001. This will change iteratively if decay is on. self.learning_rate = kwargs.get("learning_rate", 0.0001) self.weight_decay = kwargs.get("weight_decay", 0.0) self.optim_words = kwargs.get("optim_words", True) self.epoch_num = 0 # Sampling methods self.ignore_posbatch = kwargs.get("ignore_posbatch",False) self.joint_factor = kwargs.get("joint_factor",1.0) self.hidden_units = kwargs.get("hidden_units", "200") if self.hidden_units=='0': self.hidden_units=[] else: self.hidden_units = [int(x) for x in self.hidden_units.split(",")] self.opt_type = kwargs.get("opt_type", "adam") self.use_batch_norm = kwargs.get('use_batch_norm',False) self.model_info = self._construct_model_info( input_size = train_dataset.img_feat_size, output_size = self.one_hot.vocab_size, hidden_units=self.hidden_units, learning_rate = self.learning_rate, optim_words = self.optim_words, use_batch_norm = self.use_batch_norm, wv_arr = self.w, weight_decay = self.weight_decay ) self.test_one_hot = None self.test_w = None super(NegSamplingModel, self).__init__() def iter_batches(self, dataset, batch_size): for x, y in super(NegSamplingModel, self).iter_batches(dataset, batch_size): yield x, y # This will decay the learning rate every ten epochs. Hardcoded ten currently... if self.weight_decay: if self.epoch_num and self.epoch_num % 15 == 0 and self.learning_rate > 1e-6: self.learning_rate *= self.weight_decay logger.info('Learning rate dropped to {}'.format(self.learning_rate)) self.epoch_num+=1 def _get_ids(self, tag_ids, numSamps=[5, 10], uniform_sampling=False): """ Takes a batch worth of text tags and returns positive/negative ids """ pos_word_ids = np.ones((len(tag_ids), numSamps[0]), dtype=np.int32) pos_word_ids.fill(-1) for ind, tags in enumerate(tag_ids): if len(tags) > 0: pos_word_ids[ind] = np.random.choice(tags, size=numSamps[0]) neg_word_ids = None if uniform_sampling: neg_word_ids = np.random.randint(0, self.one_hot.vocab_size, size=(len(tag_ids), numSamps[1])) else: neg_word_ids = np.ones((len(tag_ids), numSamps[1]), dtype=np.int32) neg_word_ids.fill(-1) for ind in range(pos_word_ids.shape[0]): if self.ignore_posbatch: # NOTE: This function call should definitely be pos_word_ids[ind] # but that results in significantly worse performance # I wish I understood why. # I think this means we won't sample any tags that appear in the batch neg_word_ids[ind] = self.negsampler.negsamp_ind(pos_word_ids, numSamps[1]) else: neg_word_ids[ind] = self.negsampler.negsamp_ind(pos_word_ids[ind], numSamps[1]) return pos_word_ids, neg_word_ids def prep_fit(self, data): img_feats, text_feats_list = data text_feat_ids = [] for tags in text_feats_list: text_feat_ids.append([self.one_hot.get_index(tag) for tag in tags if tag in self.one_hot]) pos_ids, neg_ids = self._get_ids(text_feat_ids) self.pos_ids = pos_ids self.neg_ids = neg_ids if not self.optim_words: fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_ids"]: pos_ids, self.model_info["neg_ids"]: neg_ids } else: pvecs = np.zeros((pos_ids.shape[0], pos_ids.shape[1], self.w.shape[1])) nvecs = np.zeros((neg_ids.shape[0], neg_ids.shape[1], self.w.shape[1])) for i, ids in enumerate(pos_ids): pvecs[i] = self.w[ids] for i, ids in enumerate(neg_ids): nvecs[i] = self.w[ids] pvecs = pvecs.transpose((1, 0, 2)) nvecs = nvecs.transpose((1, 0, 2)) fetches = [self.model_info["optimizer"], self.model_info["loss"], self.model_info["prediction"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_vecs"]: pvecs, self.model_info["neg_vecs"]: nvecs } if self.weight_decay: feed_dict[self.model_info['learning_rate']] = self.learning_rate return fetches, feed_dict def _updatewords(self, vpindex, vnindex, vin): for i, (vpi, vni) in enumerate(zip(vpindex, vnindex)): self.w[vpi] += self.joint_factor*self.learning_rate * np.outer(1 - sigmoid(self.w[vpi].dot(vin[i])), vin[i]) self.w[vni] -= self.joint_factor*self.learning_rate * np.outer(sigmoid(self.w[vni].dot(vin[i])), vin[i]) def fit(self, sess, fetches, feed_dict): fit_fetches = super(NegSamplingModel, self).fit(sess, fetches, feed_dict) if self.optim_words: if self.pos_ids is None or self.neg_ids is None: raise Exception("pos_ids or neg_ids is not set; cannot update word vectors. Did you run prep_fit()?") _, _, prediction = fit_fetches self._updatewords(self.pos_ids, self.neg_ids, prediction) return fit_fetches def prep_predict(self, dataset, cross_eval=False): if cross_eval: self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_w = construct_W(self.wv_model, self.test_one_hot.get_key_ordering()).T else: self.test_one_hot = self.one_hot self.test_w = self.w x = [] y = [] for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) y = np.asarray(y) fetches = [self.model_info["prediction"], ] feed_dict = { self.model_info["input"]: x } truth = y return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): predictions = predict_fetches[0] if cross_eval and self.test_w is None: raise Exception("test_w is not set. Did you call prep_predict?") predictions = np.dot(predictions, self.test_w.T) return predictions def get_training_loss(self, fit_fetches): return fit_fetches[1]
class NaiveSumModel(AttalosModel): """ This model performs linear regression via NN using the naive sum of word vectors as targets. """ def _construct_model_info(self, input_size, output_size, learning_rate, hidden_units=[200]): logger.info("Input size: %s" % input_size) logger.info("Output size: %s" % output_size) model_info = {} model_info["input"] = tf.placeholder(shape=(None, input_size), dtype=tf.float32) model_info["y_truth"] = tf.placeholder(shape=(None, output_size), dtype=tf.float32) layers = [] layer = model_info["input"] for i, hidden_size in enumerate(hidden_units): layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) model_info["predictions"] = tf.contrib.layers.fully_connected(layer, output_size, activation_fn=None) model_info["loss"] = tf.reduce_sum(tf.square(model_info["predictions"] - model_info["y_truth"])) model_info["optimizer"] = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(model_info["loss"]) return model_info def __init__(self, wv_model, datasets, **kwargs): self.wv_model = wv_model #self.cross_eval = kwargs.get("cross_eval", False) #self.one_hot = OneHot([train_dataset] if self.cross_eval else [train_dataset, test_dataset], # valid_vocab=wv_model.vocab) self.one_hot = OneHot(datasets, valid_vocab=wv_model.vocab) self.wv_transformer = NaiveW2V.create_from_vocab(wv_model, self.one_hot, vocab=self.one_hot.get_key_ordering()) train_dataset = datasets[0] # train_dataset should always be first in datasets iterable self.learning_rate = kwargs.get("learning_rate", 0.0001) self.hidden_units = kwargs.get("hidden_units", "200") if self.hidden_units == '0': self.hidden_units = [] else: self.hidden_units = [int(x) for x in self.hidden_units.split(",")] self.model_info = self._construct_model_info( input_size = train_dataset.img_feat_size, output_size = self.wv_model.get_word_vector_shape()[0], learning_rate = self.learning_rate, hidden_units=self.hidden_units, ) self.test_one_hot = None super(NaiveSumModel, self).__init__() def prep_fit(self, data): img_feats_list, text_feats_list = data img_feats = np.array(img_feats_list) text_feats = [self.one_hot.get_multiple(text_feats) for text_feats in text_feats_list] text_feats = np.array(text_feats) text_feats = self.wv_transformer.transform(text_feats) fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["y_truth"]: text_feats } return fetches, feed_dict def prep_predict(self, dataset, cross_eval=False): if self.test_one_hot is None or self.test_dataset is dataset: self.test_dataset = dataset self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_w = construct_W(self.wv_model, self.test_one_hot.get_key_ordering()).T x = [] y = [] for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.test_one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) truth = np.asarray(y) fetches = [self.model_info["predictions"], ] feed_dict = { self.model_info["input"]: x } return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): if self.test_one_hot is None: raise Exception("test_one_hot is not set. Did you call prep_predict to initialize it?") predictions = predict_fetches[0] predictions = np.dot(predictions, construct_W(self.wv_model, self.test_one_hot.get_key_ordering())) return predictions def get_training_loss(self, fit_fetches): _, loss = fit_fetches return loss
class WDVModel(AttalosModel): """ This model performs logistic regression via NN using word distribution vectors (correlation vectors) as targets. """ def _construct_model_info(self, input_size, output_size, learning_rate, hidden_units=[200]): logger.info("Input size: %s" % input_size) logger.info("Output size: %s" % output_size) model_info = {} model_info["input"] = tf.placeholder(shape=(None, input_size), dtype=tf.float32) model_info["y_truth"] = tf.placeholder(shape=(None, output_size), dtype=tf.float32) layers = [] layer = model_info["input"] for i, hidden_size in enumerate(hidden_units): layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) model_info["predictions"] = tf.contrib.layers.fully_connected(layer, output_size, activation_fn=tf.sigmoid) model_info["loss"] = tf.reduce_sum(tf.square(model_info["predictions"] - model_info["y_truth"])) model_info["optimizer"] = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(model_info["loss"]) return model_info def __init__(self, wv_model, datasets, **kwargs): #train_dataset, test_dataset, **kwargs): self.wv_model = wv_model #self.cross_eval = kwargs.get("cross_eval", False) #self.one_hot = OneHot([train_dataset] if self.cross_eval else [train_dataset, test_dataset], # valid_vocab=wv_model.vocab) self.one_hot = OneHot(datasets, valid_vocab=self.wv_model.vocab) train_dataset = datasets[0] # train_dataset should always be first in datasets iterable self.wv_transformer = WDV.create_from_vocab(wv_model, vocab1=self.one_hot.get_key_ordering(), preprocess_fn=WDV.preprocess) self.learning_rate = kwargs.get("learning_rate", 0.0001) self.hidden_units = kwargs.get("hidden_units", "200") if self.hidden_units == '0': self.hidden_units = [] else: self.hidden_units = [int(x) for x in self.hidden_units.split(",")] self.model_info = self._construct_model_info( input_size = train_dataset.img_feat_size, output_size = self.one_hot.vocab_size, #self.wv_model.get_word_vector_shape()[0], learning_rate = self.learning_rate, hidden_units=self.hidden_units, ) self.transform_cache = {} self.test_one_hot = None self.test_wv_transformer = None super(WDVModel, self).__init__() def generate_key(self, word_list): return " ".join(sorted(word_list)) def add_cache(self, word_list, val): key = self.generate_key(word_list) self.transform_cache[key] = val def get_cache(self, word_list): key = self.generate_key(word_list) if key in self.transform_cache: return self.transform_cache[key] else: return None def prep_fit(self, data): img_feats_list, text_feats_list = data img_feats = np.array(img_feats_list) # normalize img_feats # new_img_feats = (new_img_feats.T / np.linalg.norm(new_img_feats, axis=1)).T new_text_feats_list = [] for text_feats in text_feats_list: new_text_feats = self.get_cache(text_feats) if new_text_feats is None: new_text_feats = [self.one_hot.get_multiple(text_feats)] new_text_feats = np.array(new_text_feats) new_text_feats = self.wv_transformer.transform(new_text_feats, postprocess_fn=WDV.postprocess) new_text_feats = new_text_feats[0] # new_text_feats is a list; get first element self.add_cache(text_feats, new_text_feats) new_text_feats_list.append(new_text_feats) text_feats = np.array(new_text_feats_list) fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["y_truth"]: text_feats } return fetches, feed_dict def prep_predict(self, dataset, cross_eval=False): x = [] y = [] if cross_eval: self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_wv_transformer = WDV.create_from_vocab(self.wv_model, vocab1=self.one_hot.get_key_ordering(), vocab2=self.test_one_hot.get_key_ordering(), preprocess_fn=scale3) else: self.test_one_hot = self.one_hot self.test_wv_transformer = None for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.test_one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) y = np.asarray(y) fetches = [self.model_info["predictions"], ] feed_dict = { self.model_info["input"]: x } truth = y return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): predictions = predict_fetches[0] if cross_eval: if self.test_wv_transformer is None: raise Exception("test_wv_transformers is not set. Did you call prep_predict?") predictions = np.dot(predictions, self.test_wv_transformer.wdv_arr) return predictions def get_training_loss(self, fit_fetches): _, loss = fit_fetches return loss """
class NegSamplingModel(AttalosModel): """ This model performs negative sampling. """ def _construct_model_info(self, input_size, output_size, learning_rate, wv_arr, hidden_units=[200, 200], optim_words=True, use_batch_norm=True): model_info = {} model_info["input"] = tf.placeholder(shape=(None, input_size), dtype=tf.float32) if optim_words: model_info["pos_vecs"] = tf.placeholder(dtype=tf.float32) model_info["neg_vecs"] = tf.placeholder(dtype=tf.float32) logger.info( "Optimization on GPU, word vectors are stored separately.") else: model_info["w2v"] = tf.Variable(wv_arr, dtype=tf.float32) model_info["pos_ids"] = tf.placeholder(dtype=tf.int32) model_info["neg_ids"] = tf.placeholder(dtype=tf.int32) model_info["pos_vecs"] = tf.transpose(tf.nn.embedding_lookup( model_info["w2v"], model_info["pos_ids"]), perm=[1, 0, 2]) model_info["neg_vecs"] = tf.transpose(tf.nn.embedding_lookup( model_info["w2v"], model_info["neg_ids"]), perm=[1, 0, 2]) logger.info("Not optimizing word vectors.") # Construct fully connected layers layers = [] layer = model_info["input"] for i, hidden_size in enumerate(hidden_units[:-1]): layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) if use_batch_norm: layer = tf.contrib.layers.batch_norm(layer) layers.append(layer) # Output layer should always be linear layer = tf.contrib.layers.linear(layer, wv_arr.shape[1]) layers.append(layer) model_info["layers"] = layers model_info["prediction"] = layer def meanlogsig(predictions, truth): reduction_indices = 2 return tf.reduce_mean( tf.log( tf.sigmoid( tf.reduce_sum(predictions * truth, reduction_indices=reduction_indices)))) pos_loss = meanlogsig(model_info["prediction"], model_info["pos_vecs"]) neg_loss = meanlogsig(-model_info["prediction"], model_info["neg_vecs"]) model_info["loss"] = -(pos_loss + neg_loss) model_info["optimizer"] = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(model_info["loss"]) #model_info["init_op"] = tf.initialize_all_variables() #model_info["saver"] = tf.train.Saver() return model_info def __init__(self, wv_model, datasets, **kwargs): self.wv_model = wv_model self.one_hot = OneHot(datasets, valid_vocab=wv_model.vocab) word_counts = NegativeSampler.get_wordcount_from_datasets( datasets, self.one_hot) self.negsampler = NegativeSampler(word_counts) train_dataset = datasets[ 0] # train_dataset should always be first in datasets self.w = construct_W(wv_model, self.one_hot.get_key_ordering()).T self.learning_rate = kwargs.get("learning_rate", 0.0001) self.optim_words = kwargs.get("optim_words", True) self.hidden_units = kwargs.get("hidden_units", "200,200") self.hidden_units = [int(x) for x in self.hidden_units.split(",")] self.model_info = self._construct_model_info( input_size=train_dataset.img_feat_size, output_size=self.one_hot.vocab_size, hidden_units=self.hidden_units, learning_rate=self.learning_rate, optim_words=self.optim_words, wv_arr=self.w) self.test_one_hot = None self.test_w = None super(NegSamplingModel, self).__init__() def _get_ids(self, tag_ids, numSamps=[5, 10], uniform_sampling=False): """ Takes a batch worth of text tags and returns positive/negative ids """ pos_word_ids = np.ones((len(tag_ids), numSamps[0]), dtype=np.int32) pos_word_ids.fill(-1) for ind, tags in enumerate(tag_ids): if len(tags) > 0: pos_word_ids[ind] = np.random.choice(tags, size=numSamps[0]) neg_word_ids = None if uniform_sampling: neg_word_ids = np.random.randint(0, self.one_hot.vocab_size, size=(len(tag_ids), numSamps[1])) else: neg_word_ids = np.ones((len(tag_ids), numSamps[1]), dtype=np.int32) neg_word_ids.fill(-1) for ind in range(pos_word_ids.shape[0]): if self.optim_words: neg_word_ids[ind] = self.negsampler.negsamp_ind( pos_word_ids[ind], numSamps[1]) else: # NOTE: This function call should definitely be pos_word_ids[ind] # but that results in significantly worse performance # I wish I understood why. # I think this means we won't sample any tags that appear in the batch neg_word_ids[ind] = self.negsampler.negsamp_ind( pos_word_ids, numSamps[1]) return pos_word_ids, neg_word_ids def prep_fit(self, data): img_feats, text_feats_list = data text_feat_ids = [] for tags in text_feats_list: text_feat_ids.append([ self.one_hot.get_index(tag) for tag in tags if tag in self.one_hot ]) pos_ids, neg_ids = self._get_ids(text_feat_ids) self.pos_ids = pos_ids self.neg_ids = neg_ids if not self.optim_words: fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_ids"]: pos_ids, self.model_info["neg_ids"]: neg_ids } else: pvecs = np.zeros( (pos_ids.shape[0], pos_ids.shape[1], self.w.shape[1])) nvecs = np.zeros( (neg_ids.shape[0], neg_ids.shape[1], self.w.shape[1])) for i, ids in enumerate(pos_ids): pvecs[i] = self.w[ids] for i, ids in enumerate(neg_ids): nvecs[i] = self.w[ids] pvecs = pvecs.transpose((1, 0, 2)) nvecs = nvecs.transpose((1, 0, 2)) fetches = [ self.model_info["optimizer"], self.model_info["loss"], self.model_info["prediction"] ] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_vecs"]: pvecs, self.model_info["neg_vecs"]: nvecs } return fetches, feed_dict def _updatewords(self, vpindex, vnindex, vin): for i, (vpi, vni) in enumerate(zip(vpindex, vnindex)): self.w[vpi] += self.learning_rate * np.outer( 1 - sigmoid(self.w[vpi].dot(vin[i])), vin[i]) self.w[vni] -= self.learning_rate * np.outer( sigmoid(self.w[vni].dot(vin[i])), vin[i]) def fit(self, sess, fetches, feed_dict): fit_fetches = super(NegSamplingModel, self).fit(sess, fetches, feed_dict) if self.optim_words: if self.pos_ids is None or self.neg_ids is None: raise Exception( "pos_ids or neg_ids is not set; cannot update word vectors. Did you run prep_fit()?" ) _, _, prediction = fit_fetches self._updatewords(self.pos_ids, self.neg_ids, prediction) return fit_fetches def prep_predict(self, dataset, cross_eval=False): if cross_eval: self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_w = construct_W(self.wv_model, self.test_one_hot.get_key_ordering()).T else: self.test_one_hot = self.one_hot self.test_w = self.w x = [] y = [] for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) y = np.asarray(y) fetches = [ self.model_info["prediction"], ] feed_dict = {self.model_info["input"]: x} truth = y return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): predictions = predict_fetches[0] if cross_eval and self.test_w is None: raise Exception("test_w is not set. Did you call prep_predict?") predictions = np.dot(predictions, self.test_w.T) return predictions def get_training_loss(self, fit_fetches): return fit_fetches[1]
class FastZeroTagModel(AttalosModel): """ Create a tensorflow graph that finds the principal direction of the target word embeddings (with negative sampling), using the loss function from "Fast Zero-Shot Image Tagging". """ def __init__(self, wv_model, datasets, **kwargs): self.wv_model = wv_model self.one_hot = OneHot(datasets, valid_vocab=wv_model.vocab) word_counts = NegativeSampler.get_wordcount_from_datasets(datasets, self.one_hot) self.negsampler = NegativeSampler(word_counts) self.w = construct_W(wv_model, self.one_hot.get_key_ordering()).T self.learning_rate = kwargs.get("learning_rate", 0.0001) self.optim_words = kwargs.get("optim_words", True) self.hidden_units = kwargs.get("hidden_units", "200,200") self.hidden_units = [int(x) for x in self.hidden_units.split(",")] self.model_info = dict() # Placeholders for data self.model_info['input'] = tf.placeholder(shape=(None, datasets[0].img_feat_size), dtype=tf.float32) self.model_info['pos_ids'] = tf.placeholder(dtype=tf.int32) self.model_info['neg_ids'] = tf.placeholder(dtype=tf.int32) self.model_info['w2v'] = tf.Variable(self.w, dtype=tf.float32) self.model_info['pos_vecs'] = tf.transpose(tf.nn.embedding_lookup(self.model_info['w2v'], self.model_info['pos_ids']), perm=[1,0,2]) self.model_info['neg_vecs'] = tf.transpose(tf.nn.embedding_lookup(self.model_info['w2v'], self.model_info['neg_ids']), perm=[1,0,2]) # Construct fully connected layers layers = [] for i, hidden_size in enumerate(self.hidden_units[:-1]): if i == 0: layer = tf.contrib.layers.relu(self.model_info['input'], hidden_size) else: layer = tf.contrib.layers.relu(layer, hidden_size) layers.append(layer) layer = tf.contrib.layers.batch_norm(layer) layers.append(layer) # Output layer should always be linear layer = tf.contrib.layers.linear(layer, self.hidden_units[-1]) layers.append(layer) self.model_info['layers'] = layers self.model_info['prediction'] = layer def fztloss( f, pVecs, nVecs ): """ Tensorized cost function from Fast Zero-Shot Learning paper Args: f: The output from the network, a tensor of shape (# images, word embedding size) pVecs: The vector embeddings of the ground truth tags, a tensor of shape (# images, # positive tags, word embedding size) nVecs: The vector embeddings of negatively sampled tags, a tensor of shape (# images, # negative samples, word embedding size) Returns: Scalar tensor representing the batch cost """ posmul = tf.mul(pVecs, f) negmul = tf.mul(nVecs, f) tfpos = tf.reduce_sum(posmul, reduction_indices=2) tfneg = tf.reduce_sum(negmul, reduction_indices=2) tfpos = tf.transpose(tfpos, [1,0]) tfneg = tf.transpose(tfneg, [1,0]) negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] ) posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1]) differences = tf.sub(negexpan, posexpan) return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2])) loss = fztloss(self.model_info['prediction'], self.model_info['pos_vecs'], self.model_info['neg_vecs']) self.model_info['loss'] = loss self.model_info['optimizer'] = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss) def predict_feats(self, sess, x): return sess.run(self.model_info['prediction'], feed_dict={self.model_info['input']: x}) def _get_ids(self, tag_ids, numSamps=[5, 10], uniform_sampling=False): """ Takes a batch worth of text tags and returns positive/negative ids """ pos_word_ids = np.ones((len(tag_ids), numSamps[0]), dtype=np.int32) pos_word_ids.fill(-1) for ind, tags in enumerate(tag_ids): if len(tags) > 0: pos_word_ids[ind] = np.random.choice(tags, size=numSamps[0]) neg_word_ids = None if uniform_sampling: neg_word_ids = np.random.randint(0, self.one_hot.vocab_size, size=(len(tag_ids), numSamps[1])) else: neg_word_ids = np.ones((len(tag_ids), numSamps[1]), dtype=np.int32) neg_word_ids.fill(-1) for ind in range(pos_word_ids.shape[0]): # TODO: Check to see if this benefits from the same bug as negsampling code neg_word_ids[ind] = self.negsampler.negsamp_ind(pos_word_ids[ind], numSamps[1]) return pos_word_ids, neg_word_ids def prep_fit(self, data): img_feats, text_feats_list = data text_feat_ids = [] for tags in text_feats_list: text_feat_ids.append([self.one_hot.get_index(tag) for tag in tags if tag in self.one_hot]) pos_ids, neg_ids = self._get_ids(text_feat_ids) self.pos_ids = pos_ids self.neg_ids = neg_ids fetches = [self.model_info["optimizer"], self.model_info["loss"]] feed_dict = { self.model_info["input"]: img_feats, self.model_info["pos_ids"]: pos_ids, self.model_info["neg_ids"]: neg_ids } return fetches, feed_dict def prep_predict(self, dataset, cross_eval=False): if cross_eval: self.test_one_hot = OneHot([dataset], valid_vocab=self.wv_model.vocab) self.test_w = construct_W(self.wv_model, self.test_one_hot.get_key_ordering()).T else: self.test_one_hot = self.one_hot self.test_w = self.w x = [] y = [] for idx in dataset: image_feats, text_feats = dataset.get_index(idx) text_feats = self.one_hot.get_multiple(text_feats) x.append(image_feats) y.append(text_feats) x = np.asarray(x) y = np.asarray(y) fetches = [self.model_info["prediction"], ] feed_dict = { self.model_info["input"]: x } truth = y return fetches, feed_dict, truth def post_predict(self, predict_fetches, cross_eval=False): predictions = predict_fetches[0] if cross_eval and self.test_w is None: raise Exception("test_w is not set. Did you call prep_predict?") predictions = np.dot(predictions, self.test_w.T) return predictions def get_training_loss(self, fit_fetches): return fit_fetches[1]