def test(model, path): triplets = load_resnet(path) #print(triplets[0:3]) new_triplets = triplets[:25] good_pic_batch = np.array([val[1] for val in new_triplets]) bad_pic_batch = np.array([val[2] for val in new_triplets]) caption_batch = np.array([val[0] for val in new_triplets]) print(good_pic_batch.shape, caption_batch.shape) good_pic_pred = model(good_pic_batch) bad_pic_pred = model(bad_pic_batch) good_pic_pred = good_pic_pred / mg.sqrt(mg.sum(mg.power(good_pic_pred, 2))) bad_pic_pred = bad_pic_pred / mg.sqrt((mg.sum(mg.power(bad_pic_pred, 2)))) print(good_pic_pred.shape) # good_pic_pred = good_pic_pred.reshape(1600, 1, 1) # bad_pic_pred = bad_pic_pred.reshape(1600, 1, 1) # caption_batch = caption_batch.reshape(1600, 1, 1) Sgood = (good_pic_pred * caption_batch).sum(axis=-1) Sbad = (bad_pic_pred * caption_batch).sum(axis=-1) print(Sgood.shape, Sbad.shape) # Sgood = Sgood.reshape(32, 50) # Sbad = Sbad.reshape(32, 50) #loss = margin_ranking_loss(Sgood, Sbad, 1, 0.1) acc = accuracy(Sgood.flatten(), Sbad.flatten()) print(acc)
def normalize(arr): """ description: It should take in an array and normalize it by dividing by the magnitude of the vector. The resulting array is the unit vector :param arr: [np.ndarray] shape = (M, 50) :return: [np.array] shape = (M, 50) """ return arr/(mg.sqrt(mg.sum(arr**2)))
def __call__(self, x): N = x.shape[0] # mini-batch mean mu = mg.expand_dims(x.mean(axis=1), axis=1) # mini batch variance sqr_mu = (x - mu)**2 var = mg.expand_dims((1. / N) * mg.sum(x, axis=1), axis=1) # normalize xhat = (x - mu) / (mg.sqrt(var + self.eps)) return mg.matmul(xhat, self.gamma.T) + self.beta
def __call__(self, x): N, D = x.shape # mini-batch mean mu = (1. / N) * mg.sum(x, axis=0) # mini batch variance sqr_mu = (x - mu)**2 var = (1. / N) * mg.sum(x, axis=0) # normalize xhat = (x - mu) / (mg.sqrt(var + self.eps)) return mg.matmul(xhat, self.gamma.T) + self.beta
def cos_sim(v1, v2): ''' Calculates the cosine similarity between two vectors Parameters ---------- v1 : vector of shape (1,M) v2 : vector of shape (1,M) Returns ------- mygrad.Tensor, shape=(N, 1) The model outputs. ''' v1_sumsq = mg.sum(v1**2) v2_sumsq = mg.sum(v2**2) v1_mag = mg.sqrt(v1_sumsq) v2_mag = mg.sqrt(v2_sumsq) v1_norm = v1 / v1_mag v2_norm = v2 / v2_mag return mg.sum((v1_norm * v2_norm), axis=1)
def search_phrase(query, annotations, image_id2id, glove, vectors_512, inds_img, weight, bias, loaded_file): query = query.lower() punc_regex = re.compile('[{}]'.format(re.escape(string.punctuation))) def strip_punc(corpus): return punc_regex.sub('', corpus) query = strip_punc(query) query_components = query.split() N = len(annotations) queries_in_dict = [] for word in query_components: if word in glove: nt = 1 for caption in annotations: nt += word in set(caption.split()) idf = np.log10(N / nt) queries_in_dict.append(glove[word] * idf) print("Got the idf") final_embedding = np.mean(np.vstack(queries_in_dict), axis=0) # Find the cosine distance between the 50D vectors and the embeddings #s = np.sum(vectors_50, axis = 1) print(type(vectors_512)) print(type(vectors_512[0])) print(type(weight)) vectors_50 = mg.matmul(vectors_512, weight) + bias #print(vectors_50[:k]) #print(vectors_50[21697]) print("shape1", vectors_50.shape) x = np.arange(10).reshape(5, 2) print(np.sum(x, axis=1)) a = vectors_50**2 sum_s = a.sum(axis=1) vectors_50 = vectors_50 / mg.sqrt(sum_s).reshape(vectors_50.shape[0], 1) print("Got sum 1", vectors_50.shape) #s = np.sum(final_embedding) final_embedding = final_embedding / np.sqrt(np.sum(final_embedding**2)) print("Printing images") cos = np.dot(vectors_50.data, final_embedding) k = 4 max_vals = np.argsort(cos)[-k:] fig, ax = plt.subplots(2, 2) for ind, ima in enumerate(inds_img[max_vals]): url = loaded_file["images"][image_id2id[ima]]['coco_url'] img = get_image(url) ax[ind // 2, ind % 2].imshow(img)
def simple_batchnorm(x, gamma, beta, eps): axes = [i for i in range(x.ndim)] axes.pop(1) # every axis except 1 axes = tuple(axes) keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape)) mean = mg.mean(x, axis=axes, keepdims=True) var = mg.var(x, axis=axes, keepdims=True) norm = (x - mean) / mg.sqrt(var + eps) if gamma is not None: gamma = gamma.reshape(keepdims_shape) norm *= gamma if beta is not None: beta = beta.reshape(keepdims_shape) norm += beta return norm
def __call__(self, x): """ The model's forward pass functionality. Parameters ---------- x : numpy.ndarray, shape = (M,512) M is the number of rows Returns ------- encoded : numpy.ndarray, shape = (M,50) """ unnorm_ans = self.dense1(x) # We have to turn the output into a unit vector by dividing by the sum of the squares of the unnormalized result return unnorm_ans / (mg.sqrt(mg.sum(unnorm_ans ** 2, axis=1, keepdims=True)))
def train(model, num_epochs, margin, triplets, learning_rate=0.1, batch_size=32): """ trains the model Parameters ---------- model - Model an initizized Model class, with input and output dim matching the image ID(512) and the descriptor (50) num_epochs - int amount of epochs margin - int marhine for the margine ranking loss triplets triplets created with the data from all_triplets(path) learning_rate(optional) - int learning rate of SDG batch_size(optional) - int the batch size Returns ------- it trains the model by minimizing the loss function """ optim = SGD(model.parameters, learning_rate=learning_rate) triplets = load_resnet(r"data\triplets") #print(triplets[0:3]) images = utils.get_img_ids() for epoch_cnt in range(num_epochs): idxs = np.arange(len(images)) np.random.shuffle(idxs) for batch_cnt in range(0, len(images) // batch_size): batch_indices = idxs[batch_cnt * batch_size:(batch_cnt + 1) * batch_size] triplets_batch = [triplets[index] for index in batch_indices] #print(triplets_batch[0]) good_pic_batch = np.array([val[1] for val in triplets_batch]) bad_pic_batch = np.array([val[2] for val in triplets_batch]) caption_batch = np.array([val[0] for val in triplets_batch]) good_pic_pred = model(good_pic_batch) bad_pic_pred = model(bad_pic_batch) good_pic_pred = good_pic_pred / mg.sqrt( mg.sum(mg.power(good_pic_pred, 2), axis=-1, keepdims=True)) bad_pic_pred = bad_pic_pred / mg.sqrt( (mg.sum(mg.power(bad_pic_pred, 2), axis=-1, keepdims=True))) #print(good_pic_pred.shape) # good_pic_pred = good_pic_pred.reshape(1600, 1, 1) # bad_pic_pred = bad_pic_pred.reshape(1600, 1, 1) # caption_batch = caption_batch.reshape(1600, 1, 1) Sgood = (good_pic_pred * caption_batch).sum(axis=-1) Sbad = (bad_pic_pred * caption_batch).sum(axis=-1) #print(Sgood.shape, Sbad.shape) # Sgood = Sgood.reshape(32, 50) # Sbad = Sbad.reshape(32, 50) loss = margin_ranking_loss(Sgood, Sbad, 1, margin) acc = accuracy(Sgood.flatten(), Sbad.flatten()) if batch_cnt % 10 == 0: print(loss, acc) loss.backward() optim.step() loss.null_gradients()
def train( model, triples: List[ Tuple[np.ndarray, np.ndarray, np.ndarray]], #caption embeds, good_images, bad_images optim, plotter, batch_size: int = 150, epoch_cnt: int = 1000, margin: float = 0.1): for epoch in range(epoch_cnt): idxs = np.arange(len(triples)) np.random.shuffle(idxs) query_embeds, good_images, bad_images = unzip(triples) query_embeds, good_images, bad_images = np.array( query_embeds), np.array(good_images), np.array(bad_images) correct_list = [] for batch_cnt in range(0, len(triples) // batch_size): batch_indices = idxs[batch_cnt * batch_size:(batch_cnt + 1) * batch_size] batch_query = query_embeds[batch_indices].reshape(batch_size, 50) good_batch = good_images[batch_indices].reshape(batch_size, 512) bad_batch = bad_images[batch_indices].reshape(batch_size, 512) # print(batch_query.shape) # print(good_batch.shape) # print(bad_batch.shape) # print("____") good_image_encode: mg.Tensor = model(good_batch) bad_image_encode: mg.Tensor = model(bad_batch) # print(good_image_encode.shape) # print(bad_image_encode.shape) # print("____") good_image_encode /= mg.sqrt( mg.sum(good_image_encode**2, axis=1).reshape(batch_size, 1)) bad_image_encode /= mg.sqrt( mg.sum(bad_image_encode**2, axis=1).reshape(batch_size, 1)) batch_query /= mg.sqrt( mg.sum(batch_query**2, axis=1).reshape(batch_size, 1)) # print(good_image_encode.shape) # print(bad_image_encode.shape) # print(batch_query.shape) good_dists = mg.einsum("ij,ij -> i", good_image_encode, batch_query) bad_dists = mg.einsum("ij,ij -> i", bad_image_encode, batch_query) correct_list.append(good_dists - bad_dists > margin) loss: mg.Tensor = margin_ranking_loss(good_dists, bad_dists, 1, margin=margin) loss.backward() optim.step() loss.null_gradients() plotter.set_train_batch( { "loss": loss.item(), "acc": np.mean(np.array(correct_list)) }, batch_size=batch_size) plotter.set_test_epoch()