def load_birds(): birds = [] feats = np.zeros(4096) feats_sq = np.zeros(4096) word_counter = defaultdict(lambda: 0) with open("data/birds/cub_0917_5cap.tsv") as caption_f: for line in caption_f: parts = line.strip().split("\t") caption = parts[-1] caption = (caption.lower() .replace(".", "") .replace(",", " , ")) words = ["<s>"] + caption.split() + ["</s>"] for word in words: word_counter[word] += 1 for word, count in word_counter.items(): if count >= MIN_WORD_COUNT: WORD_INDEX.index(word) with open("data/birds/cub_0917_5cap.tsv") as caption_f: caption_f.readline() for line in caption_f: parts = line.strip().split("\t") caption = parts[-1] image_path = parts[-2] image_id = image_path.split("/")[-1] caption = (caption.lower() .replace(".", "") .replace(",", " , ")) words = ["<s>"] + caption.split() + ["</s>"] word_ids = [WORD_INDEX[w] for w in words] with np.load("data/birds/embeddings/%s.npz" % image_id) as feature_f: features = feature_f[feature_f.keys()[0]] birds.append(Bird(image_id, word_ids, features)) feats += features feats_sq += features ** 2 mean_feats = feats / len(birds) mean_feats_sq = feats_sq / len(birds) var_feats = mean_feats_sq - (mean_feats ** 2) std_feats = np.sqrt(var_feats) std_feats += 0.0001 for bird in birds: bird.features[...] -= mean_feats bird.features[...] /= std_feats train_birds = birds[:-1100] val_birds = birds[-1100:-100] test_birds = birds[-100:] return train_birds, val_birds, test_birds
def load_birds(): birds = [] feats = np.zeros(4096) feats_sq = np.zeros(4096) word_counter = defaultdict(lambda: 0) with open("data/birds/cub_0917_5cap.tsv") as caption_f: for line in caption_f: parts = line.strip().split("\t") caption = parts[-1] caption = (caption.lower().replace(".", "").replace(",", " , ")) words = ["<s>"] + caption.split() + ["</s>"] for word in words: word_counter[word] += 1 for word, count in word_counter.items(): if count >= MIN_WORD_COUNT: WORD_INDEX.index(word) with open("data/birds/cub_0917_5cap.tsv") as caption_f: caption_f.readline() for line in caption_f: parts = line.strip().split("\t") caption = parts[-1] image_path = parts[-2] image_id = image_path.split("/")[-1] caption = (caption.lower().replace(".", "").replace(",", " , ")) words = ["<s>"] + caption.split() + ["</s>"] word_ids = [WORD_INDEX[w] for w in words] with np.load("data/birds/embeddings/%s.npz" % image_id) as feature_f: features = feature_f[feature_f.keys()[0]] birds.append(Bird(image_id, word_ids, features)) feats += features feats_sq += features**2 mean_feats = feats / len(birds) mean_feats_sq = feats_sq / len(birds) var_feats = mean_feats_sq - (mean_feats**2) std_feats = np.sqrt(var_feats) std_feats += 0.0001 for bird in birds: bird.features[...] -= mean_feats bird.features[...] /= std_feats train_birds = birds[:-1100] val_birds = birds[-1100:-100] test_birds = birds[-100:] return train_birds, val_birds, test_birds
def load_scenes(scene_props): scenes = [] word_counter = defaultdict(lambda: 0) for sent_file_id in range(1, 3): with open("data/abstract/SimpleSentences/SimpleSentences%d_10020.txt" % sent_file_id) as sent_f: for sent_line in sent_f: sent_parts = sent_line.strip().split("\t") sent = sent_parts[2] sent = sent.replace('"', ' " ') sent = sent.replace("'", " ' ") sent = re.sub(r"[.?!]", "", sent) words = sent.lower().split() words = ["<s>"] + words + ["</s>"] for word in words: word_counter[word] += 1 for word, count in word_counter.items(): if count >= MIN_WORD_COUNT: WORD_INDEX.index(word) for sent_file_id in range(1, 3): with open("data/abstract/SimpleSentences/SimpleSentences%d_10020.txt" % sent_file_id) as sent_f: for sent_line in sent_f: sent_parts = sent_line.strip().split("\t") scene_id = int(sent_parts[0]) props = scene_props[scene_id] sent_id = int(sent_parts[1]) image_id = scene_id / 10 image_subid = scene_id % 10 image_strid = "%d_%d" % (image_id, image_subid) sent = sent_parts[2] sent = sent.replace('"', "") sent = re.sub(r"[.?!']", "", sent) words = sent.lower().split() words = ["<s>"] + words + ["</s>"] word_ids = [WORD_INDEX[w] or 0 for w in words] with np.load("data/abstract/EmbeddedScenes/Scene%s.png.npz" % image_strid) as feature_f: features = feature_f[feature_f.keys()[0]] scenes.append(Scene(image_strid, props, word_ids, features)) return scenes
def run_experiment(name, cname, rname, models, data): data_by_image = defaultdict(list) for datum in data: data_by_image[datum.image_id].append(datum) with open("experiments/%s/%s.ids.txt" % (name, cname)) as id_f, \ open("experiments/%s/%s.results.%s.txt" % (name, cname, rname), "w") as results_f: print >>results_f, "id,target,distractor,similarity,model_name,speaker_score,listener_score,description" counter = 0 for line in id_f: img1, img2, similarity = line.strip().split(",") assert img1 in data_by_image and img2 in data_by_image d1 = data_by_image[img1][0] d2 = data_by_image[img2][0] for model_name, model in models.items(): for i_sample in range(10): speaker_scores, listener_scores, samples = \ model.sample([d1], [[d2]], dropout=False, viterbi=False) parts = [ counter, img1, img2, similarity, model_name, speaker_scores[0], listener_scores[0], " ".join([WORD_INDEX.get(i) for i in samples[0][1:-1]]) ] print >>results_f, ",".join([str(s) for s in parts]) counter += 1
def demo(scenes, model, apollo_net, config): data = scenes[:config.batch_size] alt_indices = \ [np.random.choice(len(scenes), size=config.batch_size) for i_alt in range(config.alternatives)] alt_data = [[scenes[i] for i in alt] for alt in alt_indices] _, samples = model.sample(data, alt_data, dropout=False) for i in range(10): sample = samples[i] print data[i].image_id print " ".join([WORD_INDEX.get(i) for i in sample]) print
def sample(self, prefix, encoding, viterbi): net = self.apollo_net batch_size = net.blobs[encoding].shape[0] max_words = 20 l_seed = "LstmStringDecoder_%s_%s_seed" l_prev_word = "LstmStringDecoder_%s_%s_word_%d" l_word_vec = "LstmStringDecoder_%s_%s_word_vec_%d" l_concat = "LstmStringDecoder_%s_%s_concat_%d" l_lstm = "LstmStringDecoder_%s_%s_lstm_%d" l_hidden = "LstmStringDecoder_%s_%s_hidden_%d" l_mem = "LstmStringDecoder_%s_%s_mem_%d" l_output = "LstmStringDecoder_%s_%s_output_%d" l_softmax = "LstmStringDecoder_%s_%s_softmax_%d" p_word_vec = ["LstmStringDecoder_%s_word_vec" % self.name] p_lstm = [ "LstmStringDecoder_%s_lstm_iv" % self.name, "LstmStringDecoder_%s_lstm_ig" % self.name, "LstmStringDecoder_%s_lstm_fg" % self.name, "LstmStringDecoder_%s_lstm_og" % self.name ] p_output = [ "LstmStringDecoder_%s_output_weight" % self.name, "LstmStringDecoder_%s_output_bias" % self.name ] samples = np.zeros((batch_size, max_words), dtype=int) samples[:, 0] = WORD_INDEX["<s>"] net.f( NumpyData(l_seed, np.zeros((batch_size, self.config.hidden_size)))) for i_step in range(1, max_words): l_prev_word_i = l_prev_word % (self.name, prefix, i_step) l_word_vec_i = l_word_vec % (self.name, prefix, i_step) l_concat_i = l_concat % (self.name, prefix, i_step) l_lstm_i = l_lstm % (self.name, prefix, i_step) l_hidden_i = l_hidden % (self.name, prefix, i_step) l_mem_i = l_mem % (self.name, prefix, i_step) l_output_i = l_output % (self.name, prefix, i_step) l_softmax_i = l_softmax % (self.name, prefix, i_step) if i_step == 1: prev_hidden = l_seed prev_mem = l_seed else: prev_hidden = l_hidden % (self.name, prefix, i_step - 1) prev_mem = l_mem % (self.name, prefix, i_step - 1) net.f(NumpyData(l_prev_word_i, samples[:, i_step - 1])) net.f( Wordvec(l_word_vec_i, self.config.word_embedding_size, len(WORD_INDEX), bottoms=[l_prev_word_i], param_names=p_word_vec)) net.f( Concat(l_concat_i, bottoms=[prev_hidden, l_word_vec_i, encoding])) net.f( LstmUnit(l_lstm_i, bottoms=[l_concat_i, prev_mem], param_names=p_lstm, tops=[l_hidden_i, l_mem_i], num_cells=self.config.hidden_size)) net.f( InnerProduct(l_output_i, len(WORD_INDEX), bottoms=[l_hidden_i], param_names=p_output)) net.f(Softmax(l_softmax_i, bottoms=[l_output_i])) choices = [] for i in range(batch_size): probs = net.blobs[l_softmax_i].data[i, :].astype(np.float64) probs /= probs.sum() if viterbi: choices.append(np.argmax(probs)) else: choices.append(np.random.choice(len(WORD_INDEX), p=probs)) samples[:, i_step] = choices out_samples = [] for i in range(samples.shape[0]): this_sample = [] for j in range(samples.shape[1]): word = WORD_INDEX.get(samples[i, j]) this_sample.append(samples[i, j]) if word == "</s>": break out_samples.append(this_sample) return out_samples
def sample(self, prefix, encoding, viterbi): net = self.apollo_net max_words = 20 batch_size = net.blobs[encoding].shape[0] out_logprobs = np.zeros((batch_size, )) samples = np.zeros((batch_size, max_words)) history_features = np.zeros((batch_size, len(WORD_INDEX))) last_features = np.zeros((batch_size, len(WORD_INDEX))) samples[:, 0] = WORD_INDEX["<s>"] last_features[:, WORD_INDEX["<s>"]] += 1 l_history_data = "MlpStringDecoder_%s_%s_history_data_%d" l_last_data = "MlpStringDecoder_%s_%s_last_data_%d" l_cat_features = "MlpStringDecoder_%s_%s_cat_features_%d" l_ip1 = "MlpStringDecoder_%s_%s_ip1_%d" l_cat = "MlpStringDecoder_%s_%s_cat_%d" l_relu1 = "MlpStringDecoder_%s_%s_relu1_%d" l_ip2 = "MlpStringDecoder_%s_%s_ip2_%d" l_softmax = "MlpStringDecoder_%s_%s_softmax_%d" p_ip1 = [ "MlpStringDecoder_%s_ip1_weight" % self.name, "MlpStringDecoder_%s_ip1_bias" % self.name ] p_ip2 = [ "MlpStringDecoder_%s_ip2_weight" % self.name, "MlpStringDecoder_%s_ip2_bias" % self.name ] for i_step in range(1, max_words): l_history_data_i = l_history_data % (self.name, prefix, i_step) l_last_data_i = l_last_data % (self.name, prefix, i_step) l_cat_features_i = l_cat_features % (self.name, prefix, i_step) l_ip1_i = l_ip1 % (self.name, prefix, i_step) l_cat_i = l_cat % (self.name, prefix, i_step) l_relu1_i = l_relu1 % (self.name, prefix, i_step) l_ip2_i = l_ip2 % (self.name, prefix, i_step) l_softmax_i = l_softmax % (self.name, prefix, i_step) net.f(DummyData(l_history_data_i, (1, 1, 1, 1))) net.blobs[l_history_data_i].reshape(history_features.shape) net.f(DummyData(l_last_data_i, (1, 1, 1, 1))) net.blobs[l_last_data_i].reshape(last_features.shape) net.blobs[l_history_data_i].data[...] = history_features net.blobs[l_last_data_i].data[...] = last_features net.f( Concat(l_cat_features_i, bottoms=[l_history_data_i, l_last_data_i])) net.f( InnerProduct(l_ip1_i, self.config.hidden_size, bottoms=[l_cat_features_i], param_names=p_ip1)) net.f(Concat(l_cat_i, bottoms=[l_ip1_i, encoding])) net.f(ReLU(l_relu1_i, bottoms=[l_cat_i])) net.f( InnerProduct(l_ip2_i, len(WORD_INDEX), bottoms=[l_relu1_i], param_names=p_ip2)) net.f(Softmax(l_softmax_i, bottoms=[l_ip2_i])) probs = net.blobs[l_softmax_i].data history_features += last_features last_features[...] = 0 for i_datum in range(batch_size): d_probs = probs[i_datum, :].astype(float) d_probs /= d_probs.sum() if viterbi: choice = d_probs.argmax() else: choice = np.random.multinomial(1, d_probs).argmax() samples[i_datum, i_step] = choice last_features[i_datum, choice] += 1 out_logprobs[i_datum] += np.log(d_probs[choice]) out_samples = [] for i in range(samples.shape[0]): this_sample = [] for j in range(samples.shape[1]): word = WORD_INDEX.get(samples[i, j]) #this_sample.append(word) this_sample.append(samples[i, j]) if word == "</s>": break if this_sample[-1] != WORD_INDEX["</s>"]: this_sample.append(WORD_INDEX["</s>"]) out_samples.append(this_sample) return out_logprobs, out_samples
def sample(self, prefix, encoding, viterbi): net = self.apollo_net batch_size = net.blobs[encoding].shape[0] max_words = 20 l_seed = "LstmStringDecoder_%s_%s_seed" l_prev_word = "LstmStringDecoder_%s_%s_word_%d" l_word_vec = "LstmStringDecoder_%s_%s_word_vec_%d" l_concat = "LstmStringDecoder_%s_%s_concat_%d" l_lstm = "LstmStringDecoder_%s_%s_lstm_%d" l_hidden = "LstmStringDecoder_%s_%s_hidden_%d" l_mem = "LstmStringDecoder_%s_%s_mem_%d" l_output = "LstmStringDecoder_%s_%s_output_%d" l_softmax = "LstmStringDecoder_%s_%s_softmax_%d" p_word_vec = ["LstmStringDecoder_%s_word_vec" % self.name] p_lstm = ["LstmStringDecoder_%s_lstm_iv" % self.name, "LstmStringDecoder_%s_lstm_ig" % self.name, "LstmStringDecoder_%s_lstm_fg" % self.name, "LstmStringDecoder_%s_lstm_og" % self.name] p_output = ["LstmStringDecoder_%s_output_weight" % self.name, "LstmStringDecoder_%s_output_bias" % self.name] samples = np.zeros((batch_size, max_words), dtype=int) samples[:,0] = WORD_INDEX["<s>"] net.f(NumpyData( l_seed, np.zeros((batch_size, self.config.hidden_size)))) for i_step in range(1, max_words): l_prev_word_i = l_prev_word % (self.name, prefix, i_step) l_word_vec_i = l_word_vec % (self.name, prefix, i_step) l_concat_i = l_concat % (self.name, prefix, i_step) l_lstm_i = l_lstm % (self.name, prefix, i_step) l_hidden_i = l_hidden % (self.name, prefix, i_step) l_mem_i = l_mem % (self.name, prefix, i_step) l_output_i = l_output % (self.name, prefix, i_step) l_softmax_i = l_softmax % (self.name, prefix, i_step) if i_step == 1: prev_hidden = l_seed prev_mem = l_seed else: prev_hidden = l_hidden % (self.name, prefix, i_step - 1) prev_mem = l_mem % (self.name, prefix, i_step - 1) net.f(NumpyData(l_prev_word_i, samples[:, i_step-1])) net.f(Wordvec( l_word_vec_i, self.config.word_embedding_size, len(WORD_INDEX), bottoms=[l_prev_word_i], param_names=p_word_vec)) net.f(Concat( l_concat_i, bottoms=[prev_hidden, l_word_vec_i, encoding])) net.f(LstmUnit( l_lstm_i, bottoms=[l_concat_i, prev_mem], param_names=p_lstm, tops=[l_hidden_i, l_mem_i], num_cells=self.config.hidden_size)) net.f(InnerProduct( l_output_i, len(WORD_INDEX), bottoms=[l_hidden_i], param_names=p_output)) net.f(Softmax(l_softmax_i, bottoms=[l_output_i])) choices = [] for i in range(batch_size): probs = net.blobs[l_softmax_i].data[i,:].astype(np.float64) probs /= probs.sum() if viterbi: choices.append(np.argmax(probs)) else: choices.append(np.random.choice(len(WORD_INDEX), p=probs)) samples[:, i_step] = choices out_samples = [] for i in range(samples.shape[0]): this_sample = [] for j in range(samples.shape[1]): word = WORD_INDEX.get(samples[i,j]) this_sample.append(samples[i,j]) if word == "</s>": break out_samples.append(this_sample) return out_samples
def sample(self, prefix, encoding, viterbi): net = self.apollo_net max_words = 20 batch_size = net.blobs[encoding].shape[0] out_logprobs = np.zeros((batch_size,)) samples = np.zeros((batch_size, max_words)) history_features = np.zeros((batch_size, len(WORD_INDEX))) last_features = np.zeros((batch_size, len(WORD_INDEX))) samples[:,0] = WORD_INDEX["<s>"] last_features[:,WORD_INDEX["<s>"]] += 1 l_history_data = "MlpStringDecoder_%s_%s_history_data_%d" l_last_data = "MlpStringDecoder_%s_%s_last_data_%d" l_cat_features = "MlpStringDecoder_%s_%s_cat_features_%d" l_ip1 = "MlpStringDecoder_%s_%s_ip1_%d" l_cat = "MlpStringDecoder_%s_%s_cat_%d" l_relu1 = "MlpStringDecoder_%s_%s_relu1_%d" l_ip2 = "MlpStringDecoder_%s_%s_ip2_%d" l_softmax = "MlpStringDecoder_%s_%s_softmax_%d" p_ip1 = ["MlpStringDecoder_%s_ip1_weight" % self.name, "MlpStringDecoder_%s_ip1_bias" % self.name] p_ip2 = ["MlpStringDecoder_%s_ip2_weight" % self.name, "MlpStringDecoder_%s_ip2_bias" % self.name] for i_step in range(1, max_words): l_history_data_i = l_history_data % (self.name, prefix, i_step) l_last_data_i = l_last_data % (self.name, prefix, i_step) l_cat_features_i = l_cat_features % (self.name, prefix, i_step) l_ip1_i = l_ip1 % (self.name, prefix, i_step) l_cat_i = l_cat % (self.name, prefix, i_step) l_relu1_i = l_relu1 % (self.name, prefix, i_step) l_ip2_i = l_ip2 % (self.name, prefix, i_step) l_softmax_i = l_softmax % (self.name, prefix, i_step) net.f(DummyData(l_history_data_i, (1,1,1,1))) net.blobs[l_history_data_i].reshape(history_features.shape) net.f(DummyData(l_last_data_i, (1,1,1,1))) net.blobs[l_last_data_i].reshape(last_features.shape) net.blobs[l_history_data_i].data[...] = history_features net.blobs[l_last_data_i].data[...] = last_features net.f(Concat(l_cat_features_i, bottoms=[l_history_data_i, l_last_data_i])) net.f(InnerProduct( l_ip1_i, self.config.hidden_size, bottoms=[l_cat_features_i], param_names=p_ip1)) net.f(Concat(l_cat_i, bottoms=[l_ip1_i, encoding])) net.f(ReLU(l_relu1_i, bottoms=[l_cat_i])) net.f(InnerProduct( l_ip2_i, len(WORD_INDEX), bottoms=[l_relu1_i], param_names=p_ip2)) net.f(Softmax(l_softmax_i, bottoms=[l_ip2_i])) probs = net.blobs[l_softmax_i].data history_features += last_features last_features[...] = 0 for i_datum in range(batch_size): d_probs = probs[i_datum,:].astype(float) d_probs /= d_probs.sum() if viterbi: choice = d_probs.argmax() else: choice = np.random.multinomial(1, d_probs).argmax() samples[i_datum, i_step] = choice last_features[i_datum, choice] += 1 out_logprobs[i_datum] += np.log(d_probs[choice]) out_samples = [] for i in range(samples.shape[0]): this_sample = [] for j in range(samples.shape[1]): word = WORD_INDEX.get(samples[i,j]) #this_sample.append(word) this_sample.append(samples[i,j]) if word == "</s>": break if this_sample[-1] != WORD_INDEX["</s>"]: this_sample.append(WORD_INDEX["</s>"]) out_samples.append(this_sample) return out_logprobs, out_samples