예제 #1
0
def test(model, path):
    triplets = load_resnet(path)
    #print(triplets[0:3])
    new_triplets = triplets[:25]
    good_pic_batch = np.array([val[1] for val in new_triplets])
    bad_pic_batch = np.array([val[2] for val in new_triplets])
    caption_batch = np.array([val[0] for val in new_triplets])
    print(good_pic_batch.shape, caption_batch.shape)

    good_pic_pred = model(good_pic_batch)
    bad_pic_pred = model(bad_pic_batch)
    good_pic_pred = good_pic_pred / mg.sqrt(mg.sum(mg.power(good_pic_pred, 2)))
    bad_pic_pred = bad_pic_pred / mg.sqrt((mg.sum(mg.power(bad_pic_pred, 2))))
    print(good_pic_pred.shape)

    # good_pic_pred = good_pic_pred.reshape(1600, 1, 1)
    # bad_pic_pred = bad_pic_pred.reshape(1600, 1, 1)
    # caption_batch = caption_batch.reshape(1600, 1, 1)

    Sgood = (good_pic_pred * caption_batch).sum(axis=-1)
    Sbad = (bad_pic_pred * caption_batch).sum(axis=-1)
    print(Sgood.shape, Sbad.shape)
    # Sgood = Sgood.reshape(32, 50)
    # Sbad = Sbad.reshape(32, 50)

    #loss = margin_ranking_loss(Sgood, Sbad, 1, 0.1)
    acc = accuracy(Sgood.flatten(), Sbad.flatten())
    print(acc)
예제 #2
0
def normalize(arr):
    """
    description:
        It should take in an array and normalize it  by dividing by the magnitude of the vector.
        The resulting array is the unit vector
    :param arr: [np.ndarray] shape = (M, 50)
    :return: [np.array] shape = (M, 50)
    """
    return arr/(mg.sqrt(mg.sum(arr**2)))
예제 #3
0
 def __call__(self, x):
     N = x.shape[0]
     # mini-batch mean
     mu = mg.expand_dims(x.mean(axis=1), axis=1)
     # mini batch variance
     sqr_mu = (x - mu)**2
     var = mg.expand_dims((1. / N) * mg.sum(x, axis=1), axis=1)
     # normalize
     xhat = (x - mu) / (mg.sqrt(var + self.eps))
     return mg.matmul(xhat, self.gamma.T) + self.beta
예제 #4
0
 def __call__(self, x):
     N, D = x.shape
     # mini-batch mean
     mu = (1. / N) * mg.sum(x, axis=0)
     # mini batch variance
     sqr_mu = (x - mu)**2
     var = (1. / N) * mg.sum(x, axis=0)
     # normalize
     xhat = (x - mu) / (mg.sqrt(var + self.eps))
     return mg.matmul(xhat, self.gamma.T) + self.beta
예제 #5
0
def cos_sim(v1, v2):
    '''

    Calculates the cosine similarity between two vectors

    Parameters
    ----------
    v1 : vector of shape (1,M)
    v2 : vector of shape (1,M)

    Returns
    -------
    mygrad.Tensor, shape=(N, 1)
        The model outputs.
    '''
    v1_sumsq = mg.sum(v1**2)
    v2_sumsq = mg.sum(v2**2)
    v1_mag = mg.sqrt(v1_sumsq)
    v2_mag = mg.sqrt(v2_sumsq)
    v1_norm = v1 / v1_mag
    v2_norm = v2 / v2_mag
    return mg.sum((v1_norm * v2_norm), axis=1)
예제 #6
0
def search_phrase(query, annotations, image_id2id, glove, vectors_512,
                  inds_img, weight, bias, loaded_file):
    query = query.lower()

    punc_regex = re.compile('[{}]'.format(re.escape(string.punctuation)))

    def strip_punc(corpus):
        return punc_regex.sub('', corpus)

    query = strip_punc(query)
    query_components = query.split()
    N = len(annotations)
    queries_in_dict = []
    for word in query_components:
        if word in glove:
            nt = 1
            for caption in annotations:
                nt += word in set(caption.split())
            idf = np.log10(N / nt)
            queries_in_dict.append(glove[word] * idf)
    print("Got the idf")

    final_embedding = np.mean(np.vstack(queries_in_dict), axis=0)
    # Find the cosine distance between the 50D vectors and the embeddings
    #s = np.sum(vectors_50, axis = 1)
    print(type(vectors_512))
    print(type(vectors_512[0]))
    print(type(weight))
    vectors_50 = mg.matmul(vectors_512, weight) + bias
    #print(vectors_50[:k])
    #print(vectors_50[21697])
    print("shape1", vectors_50.shape)
    x = np.arange(10).reshape(5, 2)
    print(np.sum(x, axis=1))
    a = vectors_50**2
    sum_s = a.sum(axis=1)
    vectors_50 = vectors_50 / mg.sqrt(sum_s).reshape(vectors_50.shape[0], 1)
    print("Got sum 1", vectors_50.shape)
    #s = np.sum(final_embedding)
    final_embedding = final_embedding / np.sqrt(np.sum(final_embedding**2))
    print("Printing images")
    cos = np.dot(vectors_50.data, final_embedding)
    k = 4
    max_vals = np.argsort(cos)[-k:]

    fig, ax = plt.subplots(2, 2)
    for ind, ima in enumerate(inds_img[max_vals]):

        url = loaded_file["images"][image_id2id[ima]]['coco_url']
        img = get_image(url)
        ax[ind // 2, ind % 2].imshow(img)
예제 #7
0
def simple_batchnorm(x, gamma, beta, eps):
    axes = [i for i in range(x.ndim)]
    axes.pop(1)  # every axis except 1
    axes = tuple(axes)
    keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape))

    mean = mg.mean(x, axis=axes, keepdims=True)
    var = mg.var(x, axis=axes, keepdims=True)
    norm = (x - mean) / mg.sqrt(var + eps)

    if gamma is not None:
        gamma = gamma.reshape(keepdims_shape)
        norm *= gamma

    if beta is not None:
        beta = beta.reshape(keepdims_shape)
        norm += beta
    return norm
예제 #8
0
    def __call__(self, x):
        """ The model's forward pass functionality.
        
        Parameters
        ----------
        x : numpy.ndarray, shape = (M,512)
            M is the number of rows
            
        Returns
        -------
        encoded : numpy.ndarray, shape = (M,50)
        
        """

        unnorm_ans = self.dense1(x)

        # We have to turn the output into a unit vector by dividing by the sum of the squares of the unnormalized result
        return unnorm_ans / (mg.sqrt(mg.sum(unnorm_ans ** 2, axis=1, keepdims=True)))
예제 #9
0
def train(model,
          num_epochs,
          margin,
          triplets,
          learning_rate=0.1,
          batch_size=32):
    """ trains the model 
        
        Parameters
        ----------
        
        model -  Model
            an initizized Model class, with input and output dim matching the image ID(512) and the descriptor (50) 
        
        num_epochs - int
            amount of epochs
            
        margin - int
            marhine for the margine ranking loss
            
        triplets 
            triplets created with the data from all_triplets(path)
        
        learning_rate(optional) - int
            learning rate of SDG
            
        batch_size(optional) - int
            the batch size
            

        Returns
        -------
        it trains the model by minimizing the loss function
        
        """
    optim = SGD(model.parameters, learning_rate=learning_rate)
    triplets = load_resnet(r"data\triplets")
    #print(triplets[0:3])
    images = utils.get_img_ids()

    for epoch_cnt in range(num_epochs):
        idxs = np.arange(len(images))
        np.random.shuffle(idxs)

        for batch_cnt in range(0, len(images) // batch_size):

            batch_indices = idxs[batch_cnt * batch_size:(batch_cnt + 1) *
                                 batch_size]
            triplets_batch = [triplets[index] for index in batch_indices]
            #print(triplets_batch[0])

            good_pic_batch = np.array([val[1] for val in triplets_batch])
            bad_pic_batch = np.array([val[2] for val in triplets_batch])
            caption_batch = np.array([val[0] for val in triplets_batch])

            good_pic_pred = model(good_pic_batch)
            bad_pic_pred = model(bad_pic_batch)
            good_pic_pred = good_pic_pred / mg.sqrt(
                mg.sum(mg.power(good_pic_pred, 2), axis=-1, keepdims=True))
            bad_pic_pred = bad_pic_pred / mg.sqrt(
                (mg.sum(mg.power(bad_pic_pred, 2), axis=-1, keepdims=True)))
            #print(good_pic_pred.shape)

            # good_pic_pred = good_pic_pred.reshape(1600, 1, 1)
            # bad_pic_pred = bad_pic_pred.reshape(1600, 1, 1)
            # caption_batch = caption_batch.reshape(1600, 1, 1)

            Sgood = (good_pic_pred * caption_batch).sum(axis=-1)
            Sbad = (bad_pic_pred * caption_batch).sum(axis=-1)
            #print(Sgood.shape, Sbad.shape)
            # Sgood = Sgood.reshape(32, 50)
            # Sbad = Sbad.reshape(32, 50)

            loss = margin_ranking_loss(Sgood, Sbad, 1, margin)
            acc = accuracy(Sgood.flatten(), Sbad.flatten())
            if batch_cnt % 10 == 0:
                print(loss, acc)

            loss.backward()
            optim.step()
            loss.null_gradients()
예제 #10
0
파일: trainer.py 프로젝트: mkhan45/CogLens
def train(
        model,
        triples: List[
            Tuple[np.ndarray, np.ndarray,
                  np.ndarray]],  #caption embeds, good_images, bad_images
        optim,
        plotter,
        batch_size: int = 150,
        epoch_cnt: int = 1000,
        margin: float = 0.1):

    for epoch in range(epoch_cnt):
        idxs = np.arange(len(triples))
        np.random.shuffle(idxs)

        query_embeds, good_images, bad_images = unzip(triples)
        query_embeds, good_images, bad_images = np.array(
            query_embeds), np.array(good_images), np.array(bad_images)

        correct_list = []

        for batch_cnt in range(0, len(triples) // batch_size):
            batch_indices = idxs[batch_cnt * batch_size:(batch_cnt + 1) *
                                 batch_size]

            batch_query = query_embeds[batch_indices].reshape(batch_size, 50)
            good_batch = good_images[batch_indices].reshape(batch_size, 512)
            bad_batch = bad_images[batch_indices].reshape(batch_size, 512)

            # print(batch_query.shape)
            # print(good_batch.shape)
            # print(bad_batch.shape)
            # print("____")

            good_image_encode: mg.Tensor = model(good_batch)
            bad_image_encode: mg.Tensor = model(bad_batch)

            # print(good_image_encode.shape)
            # print(bad_image_encode.shape)
            # print("____")

            good_image_encode /= mg.sqrt(
                mg.sum(good_image_encode**2, axis=1).reshape(batch_size, 1))
            bad_image_encode /= mg.sqrt(
                mg.sum(bad_image_encode**2, axis=1).reshape(batch_size, 1))
            batch_query /= mg.sqrt(
                mg.sum(batch_query**2, axis=1).reshape(batch_size, 1))

            # print(good_image_encode.shape)
            # print(bad_image_encode.shape)
            # print(batch_query.shape)

            good_dists = mg.einsum("ij,ij -> i", good_image_encode,
                                   batch_query)
            bad_dists = mg.einsum("ij,ij -> i", bad_image_encode, batch_query)

            correct_list.append(good_dists - bad_dists > margin)

            loss: mg.Tensor = margin_ranking_loss(good_dists,
                                                  bad_dists,
                                                  1,
                                                  margin=margin)

            loss.backward()

            optim.step()

            loss.null_gradients()

            plotter.set_train_batch(
                {
                    "loss": loss.item(),
                    "acc": np.mean(np.array(correct_list))
                },
                batch_size=batch_size)

        plotter.set_test_epoch()