Beispiel #1
0
def _extract_feature(vgg_model_path, batch_size=32):
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for split in ['train', 'val', 'test']:
            anno_path = '/home/yifan/PythonProjects/im2txt-att/data/%s/%s.annotations.pkl' % (split, split)
            save_path = '/home/yifan/PythonProjects/im2txt-att/data/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)
            
            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)
            
            for start, end in zip(range(0, n_examples, batch_size), 
                                  range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print('Processed %d %s features..' % (end, split))
            
            #use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print('Saved %s..' % (save_path))
    def __init__(self, vgg19_path=None):
        if vgg19_path is not None:
            self.vggnet = Vgg19(vgg19_path)
            self.vggnet.build()

        self.sess = None
        self.val_data = None
Beispiel #3
0
    def __init__(self, word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, 
                  prev2out=True, ctx2out=True, alpha_c=0.0, selector=True, dropout=True, use_cnn = None, cnn_model_path = None):
        """
        Args:
            word_to_idx: word-to-index mapping dictionary.
            dim_feature: (optional) Dimension of vggnet19 conv5_3 feature vectors.
            dim_embed: (optional) Dimension of word embedding.
            dim_hidden: (optional) Dimension of all hidden state.
            n_time_step: (optional) Time step size of LSTM. 
            prev2out: (optional) previously generated word to hidden state. (see Eq (7) for explanation)
            ctx2out: (optional) context to hidden state (see Eq (7) for explanation)
            alpha_c: (optional) Doubly stochastic regularization coefficient. (see Section (4.2.1) for explanation)
            selector: (optional) gating scalar for context vector. (see Section (4.2.1) for explanation)
            dropout: (optional) If true then dropout layer is added.
        """

        self.word_to_idx = word_to_idx
        self.idx_to_word = {i: w for w, i in iteritems(word_to_idx)}
        self.prev2out = prev2out
        self.ctx2out = ctx2out
        self.alpha_c = alpha_c
        self.selector = selector
        self.dropout = dropout
        self.use_cnn = use_cnn
        self.V = len(word_to_idx)
        self.L = dim_feature[0]
        self.D = dim_feature[1]
        self.M = dim_embed
        self.H = dim_hidden
        self.T = n_time_step
        self._start = word_to_idx['<START>']
        self._null = word_to_idx['<NULL>']

        self.weight_initializer = tf.contrib.layers.xavier_initializer()
        self.const_initializer = tf.constant_initializer(0.0)
        self.emb_initializer = tf.random_uniform_initializer(minval=-1.0, maxval=1.0)

        if use_cnn is None:
            # Place holder for features
            self.features = tf.placeholder(tf.float32, [None, self.L, self.D])
        else:
            # build CNN model
            if use_cnn == "inception":
                self.cnn = InceptionV3(cnn_model_path)
            elif use_cnn == "vgg":
                self.cnn = Vgg19(cnn_model_path)
            else:
                raise RuntimeError("Unknown CNN model " + use_cnn)
            self.cnn.build()
            # Place holder for image input
            self.images = self.cnn.images
            # output features from CNN
            self.features = self.cnn.features
        # Place holder for captions
        self.captions = tf.placeholder(tf.int32, [None, self.T + 1])
Beispiel #4
0
def main():
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 20
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1

    # about 500 images and 2500 captions
    test_dataset = _process_test_data(image_dir='image/XingBi_image_resized/')

    print('Finished processing caption data')

    split = 'X_test'
    save_pickle(test_dataset, 'data/X_test/X_test.annotations.pkl')

    annotations = load_pickle('./data/%s/%s.annotations.pkl' % (split, split))

    file_names = np.asarray(annotations['file_name'])
    save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split))

    image_idxs = _build_image_idxs(annotations)
    save_pickle(image_idxs, './data/%s/%s.image.idxs.pkl' % (split, split))

    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    # batch size for extracting feature vectors from vggnet.
    batch_size = 80
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        anno_path = './data/%s/%s.annotations.pkl' % (split, split)
        annotations = load_pickle(anno_path)

        image_path = list(annotations['file_name'].unique())
        save_path = './data/%s/%s.features.hkl' % (split, split)

        n_examples = len(image_path)
        all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)
        for start, end in zip(range(0, n_examples, batch_size),
                              range(batch_size, n_examples + batch_size, batch_size)):
            if (end > len(image_path)): end = len(image_path)
            image_batch_file = image_path[start:end]
            image_batch = np.array(list(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file))).astype(
                np.float32)
            # print(start,end)
            feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
            all_feats[start:end, :] = feats
            print("Processed %d %s features.." % (end, split))

        # use hickle to save huge feature vectors
        hickle.dump(all_feats, save_path)
        print("Saved %s.." % (save_path))
Beispiel #5
0
def get_val(filenames):
    # batch size for extracting feature vectors from vggnet.
    batch_size = 2
    # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'

    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        image_batch = np.array(
            map(lambda x: ndimage.imread(x, mode='RGB'),
                filenames)).astype(np.float32)
        feats = sess.run(vggnet.features,
                         feed_dict={vggnet.images: image_batch})
    data = {}
    data['filenames'] = filenames
    data['features'] = feats
    return data
Beispiel #6
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 500
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 100
    # # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    ## Data Path

    data_path = './data/Sample1/'
    # # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
    #
    caption_file = 'data/annotations/captions_train2014.json'
    image_dir = './image/%2014_resized/'
    #
    train_dataset = _process_caption_data(caption_data=data_path +
                                          'train/train.json',
                                          max_length=max_length)

    test_dataset = _process_caption_data(caption_data=data_path +
                                         'test/test.json',
                                         max_length=max_length)

    print('Finished processing caption data')
    #
    save_pickle(train_dataset, data_path + 'train/train.annotations.pkl')

    save_pickle(test_dataset, data_path + 'test/test.annotations.pkl')

    #
    for split in ['train', 'test']:
        # for split in ['train','val']:
        annotations = load_pickle(data_path + '%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx, process_vocab, max_l = _build_vocab(
                annotations=annotations, threshold=word_count_threshold)
            save_pickle(word_to_idx, data_path + '%s/word_to_idx.pkl' % split)
            save_pickle(process_vocab,
                        data_path + '%s/process-vocab.pkl' % split)
        #
        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_l)
        save_pickle(captions,
                    data_path + '%s/%s.captions.pkl' % (split, split))
        #
        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names,
                    data_path + '%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs,
                    data_path + '%s/%s.image.idxs.pkl' % (split, split))
        #
        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['report_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    data_path + '%s/%s.references.pkl' % (split, split))
        print("Finished building %s caption dataset" % split)

    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    #model=pretrained_model()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in ['train', 'test']:
            anno_path = data_path + '%s/%s.annotations.pkl' % (split, split)
            save_path = data_path + '%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = annotations['images']
            n_examples = len(image_path)
            # ndarray to store image features from two images together
            all_feats = np.ndarray([n_examples, 196, 1024], dtype=np.float32)
            #all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)
            i = 0

            # for start, end in zip(range(0, n_examples, batch_size),
            for image_record in list(image_path):
                print(type(image_record))
                print(len(image_record))
                j = 0
                comb_map = np.ndarray([len(image_record), 196, 512],
                                      dtype=np.float32)
                for image in image_record:
                    #                     range(batch_size, n_examples + batch_size, batch_size)):
                    # image_batch_file = image_path[start:end]
                    image_batch_file = image
                    print(image_batch_file)
                    # # image_batch = np.array(map(lambda x: ndimage.imread(x+'.png', mode='RGB'), image_batch_file)).astype(np.float32)
                    # # image_batch = np.array(list(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file))).astype(np.float32)
                    image_batch = np.expand_dims(np.array(
                        ndimage.imread(image, mode='RGB').astype(np.float32)),
                                                 axis=0)
                    # # print("shape:", image_batch.shape)
                    feats = sess.run(vggnet.features,
                                     feed_dict={vggnet.images: image_batch})
                    #feats = extract_features(model,image_batch_file)
                    #feats=featureVec_image(image_batch_file,model)
                    # feats=Main_hyper(image_batch)
                    # all_feats[start:end, :] = feats
                    comb_map[j, :] = feats
                    j += 1
                new_map = merge_feature_maps(comb_map)
                all_feats[i, :] = new_map
                #all_feats[i, :] = comb_map[0,:]
                i += 1
                # print ("Processed %d %s features.." % (end, split))
                print("Process %d %s features" % (i, split))
                # hickle.dump(all_feats, save_path)

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
    def train(self):
        # train/val dataset
        n_examples = self.data['captions'].shape[0]
        n_iters_per_epoch = int(np.ceil(float(n_examples)/self.batch_size))
        # features = self.data['features']
        captions = self.data['captions']
        image_idxs = self.data['image_idxs']
        train_file_names=self.data['file_names']
        # val_features = self.val_data['features']
        val_file_names=self.val_data['file_names']
        n_iters_val = int(np.ceil(float(val_file_names.shape[0])/self.batch_size))

        # build graphs for training model and sampling captions
        loss = self.model.build_model()
        #This is my addtion
        vgg_model_path = '/mnt/zye/show-attend-and-tell/data/imagenet-vgg-verydeep-19.mat'
        vggnet = Vgg19(vgg_model_path)
        vggnet.build()
        ###################

        # train op
        with tf.name_scope('optimizer'):
            optimizer = self.optimizer(learning_rate=self.learning_rate)
            grads = tf.gradients(loss, tf.trainable_variables())
            grads_and_vars = list(zip(grads, tf.trainable_variables()))
            train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

        tf.get_variable_scope().reuse_variables()
        _, _, generated_captions = self.model.build_sampler(max_len=20)
           
        # summary op   
        tf.summary.scalar('batch_loss', loss)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_and_vars:
            tf.summary.histogram(var.op.name+'/gradient', grad)
        
        summary_op = tf.summary.merge_all()

        print "The number of epoch: %d" %self.n_epochs
        print "Data size: %d" %n_examples
        print "Batch size: %d" %self.batch_size
        print "Iterations per epoch: %d" %n_iters_per_epoch
        
        config = tf.ConfigProto(allow_soft_placement = True)
        #config.gpu_options.per_process_gpu_memory_fraction=0.9
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            sess.run(tf.variables_initializer(tf.global_variables()+tf.get_collection('Vgg19')))
            summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph())
            saver = tf.train.Saver(tf.global_variables(),max_to_keep=5)

            if self.pretrained_model is not None:
                print "Start training with pretrained Model.."
                saver.restore(sess, self.pretrained_model)

            prev_loss = -1
            curr_loss = 0
            start_t = time.time()

            for e in range(self.n_epochs):
                rand_idxs = np.random.permutation(n_examples)
                captions = captions[rand_idxs]
                image_idxs = image_idxs[rand_idxs]

                for i in range(n_iters_per_epoch):
                    captions_batch = captions[i*self.batch_size:(i+1)*self.batch_size]
                    image_idxs_batch = image_idxs[i*self.batch_size:(i+1)*self.batch_size]
                    #######This is my addtion########
                    image_batch_file=list(train_file_names[image_idxs_batch])
                    image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(
                        np.float32)
                    features_batch= sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                    #################################

                    # features_batch = features[image_idxs_batch]
                    feed_dict = {self.model.features: features_batch, self.model.captions: captions_batch}
                    _, l = sess.run([train_op, loss], feed_dict)
                    curr_loss += l

                    # write summary for tensorboard visualization
                    if i % 10 == 0:
                        summary = sess.run(summary_op, feed_dict)
                        summary_writer.add_summary(summary, e*n_iters_per_epoch + i)

                    if (i+1) % self.print_every == 0:
                        print "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" %(e+1, i+1, l)
                        ground_truths = captions[image_idxs == image_idxs_batch[0]]
                        decoded = decode_captions(ground_truths, self.model.idx_to_word)
                        for j, gt in enumerate(decoded):
                            print "Ground truth %d: %s" %(j+1, gt)                    
                        gen_caps = sess.run(generated_captions, feed_dict)
                        decoded = decode_captions(gen_caps, self.model.idx_to_word)
                        print "Generated caption: %s\n" %decoded[0]

                print "Previous epoch loss: ", prev_loss
                print "Current epoch loss: ", curr_loss
                print "Elapsed time: ", time.time() - start_t
                prev_loss = curr_loss
                curr_loss = 0
                
                # print out BLEU scores and file write
                if self.print_bleu:
                    all_gen_cap = np.ndarray((val_file_names.shape[0], 20))
                    for i in range(n_iters_val):
                        ########This is my addtion########
                        image_batch_file = list(val_file_names[i*self.batch_size:(i + 1)*self.batch_size])
                        image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(
                            np.float32)
                        features_batch = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                        ###################################
                        # features_batch = val_features[i*self.batch_size:(i+1)*self.batch_size]
                        feed_dict = {self.model.features: features_batch}
                        gen_cap = sess.run(generated_captions, feed_dict=feed_dict)  
                        all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap
                    
                    all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word)
                    save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl")
                    scores = evaluate(data_path='./data', split='val', get_scores=True)
                    write_bleu(scores=scores, path=self.model_path, epoch=e)

                # save model's parameters
                if (e+1) % self.save_every == 0:
                    saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e+1)
                    print "model-%s saved." %(e+1)
    def test(self,split='test', attention_visualization=True, save_sampled_captions=True):
        '''
        Args:
            - data: dictionary with the following keys:
            # - features: Feature vectors of shape (5000, 196, 512)
            - file_names: Image file names of shape (5000, )
            - captions: Captions of shape (24210, 17) 
            - image_idxs: Indices for mapping caption to image of shape (24210, ) 
            - features_to_captions: Mapping feature to captions (5000, 4~5)
            - split: 'train', 'val' or 'test'
            - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook)
            - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores.
        '''

        # features = data['features']

        # build a graph to sample captions
        alphas, betas, sampled_captions = self.model.build_sampler(max_len=20)    # (N, max_len, L), (N, max_len)
        # This is my addtion
        vgg_model_path = '/mnt/zye/show-attend-and-tell/data/imagenet-vgg-verydeep-19.mat'
        vggnet = Vgg19(vgg_model_path)
        vggnet.build()
        ###################
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # sess.run(tf.global_variables_initializer())
            sess.run(tf.variables_initializer(tf.global_variables() + tf.get_collection('Vgg19')))

            saver = tf.train.Saver(tf.global_variables())
            # ckpt=tf.train.get_checkpoint_state(self.test_model)
            # print ckpt
            saver.restore(sess, self.test_model)
            print 'success'

            data_size=self.test_data['file_names'].shape[0]
            mask = np.random.choice(data_size, self.batch_size)
            image_files = self.test_data['file_names'][mask]
            image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'),list(image_files))).astype(
                np.float32)
            features_batch = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
            # features_batch, image_files = sample_coco_minibatch(data, self.batch_size)
            feed_dict = { self.model.features: features_batch }
            alps, bts, sam_cap = sess.run([alphas, betas, sampled_captions], feed_dict)  # (N, max_len, L), (N, max_len)
            decoded = decode_captions(sam_cap, self.model.idx_to_word)

            if attention_visualization:
                for n in range(10):
                    print "Sampled Caption: %s" %decoded[n]

                    # Plot original image
                    img = ndimage.imread(image_files[n])
                    plt.subplot(4, 5, 1)
                    plt.imshow(img)
                    plt.axis('off')

                    # Plot images with attention weights 
                    words = decoded[n].split(" ")
                    for t in range(len(words)):
                        if t > 18:
                            break
                        plt.subplot(4, 5, t+2)
                        plt.text(0, 1, '%s(%.2f)'%(words[t], bts[n,t]) , color='black', backgroundcolor='white', fontsize=8)
                        plt.imshow(img)
                        alp_curr = alps[n,t,:].reshape(14,14)
                        alp_img = skimage.transform.pyramid_expand(alp_curr, upscale=16, sigma=20)
                        plt.imshow(alp_img, alpha=0.85)
                        plt.axis('off')
                    plt.show()

            if save_sampled_captions:
                all_sam_cap = np.ndarray((data_size, 20))
                num_iter = int(np.ceil(float(data_size) / self.batch_size))
                for i in range(num_iter):
                    image_batch_file = list(self.test_data['file_names'][i * self.batch_size:(i + 1) * self.batch_size])
                    image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(
                        np.float32)
                    features_batch = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                    # features_batch = features[i*self.batch_size:(i+1)*self.batch_size]
                    feed_dict = { self.model.features: features_batch }
                    all_sam_cap[i*self.batch_size:(i+1)*self.batch_size] = sess.run(sampled_captions, feed_dict)  
                all_decoded = decode_captions(all_sam_cap, self.model.idx_to_word)
                save_pickle(all_decoded, "./data/%s/%s.candidate.captions.pkl" %(split,split))
def main():
    PATH = os.getcwd()
    vgg_model_path = PATH + '/data/imagenet-vgg-verydeep-19.mat'
    num_of_image_per_video = 17
    type = ['train', 'val', 'test']
    # TIME = str(datetime.now())
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for each in type:

            # settle down the paths
            path = PATH + '/data/data_set/' + each + '/'
            save_path_feats = path + 'features_' + each + '.hkl'
            save_path_labels_all = path + 'labels_all_' + each + '.hkl'

            # load video_filenames and labels
            video_filename = load_pickle(path + 'video_filenames_' + each +
                                         '.pkl')
            labels = load_pickle(path + 'labels_' + each + '.pkl')

            # gather the whole data in the current type
            all_feats = np.ndarray(
                [len(video_filename), num_of_image_per_video, 196, 512],
                dtype=np.float32)
            all_labels = [None] * len(video_filename)

            # feature extraction
            for idx, vf in enumerate(video_filename):
                images_list = sorted(list(os.walk(vf))[0][-1], cmp=comp)
                print('Processed' + str(idx + 1) + 'videos..')

                # # generate images_path
                cur_images_path = [vf + '/' + image for image in images_list]
                step = int(
                    float(len(images_list)) / float(num_of_image_per_video))
                print(step)

                # Supplement
                if step == 0:
                    cur_images_path += [cur_images_path[-1]] * (
                        num_of_image_per_video - len(cur_images_path))

                # do not jump
                if step == 1:
                    # cut from the middle
                    start_num = np.floor(
                        float(len(images_list) - num_of_image_per_video) / 2)
                    start = 1 if start_num == 0 else start_num
                    cur_images_path = cur_images_path[
                        int(start - 1):int(num_of_image_per_video + start - 1)]

                # jump
                if step > 1:
                    # cut by jumping --  start from the bottom of each partition
                    cur_images_path = cur_images_path[step - 1::step]
                    # cut from the middle again in case of the residual effects
                    start_num = np.floor(
                        float(len(cur_images_path) - num_of_image_per_video) /
                        2)
                    start = 1 if start_num == 0 else start_num
                    cur_images_path = cur_images_path[
                        int(start - 1):int(num_of_image_per_video + start - 1)]

                # in case of failure
                if len(cur_images_path) != num_of_image_per_video:
                    print('step: ' + str(step))
                    print('length of origianl images: ' +
                          str(len(images_list)))
                    print('length of standard: ' + str(num_of_image_per_video))
                    print('length: ' + str(len(cur_images_path)))
                    print('errors occur..')
                    exit()

                cur_labels = labels[idx]

                # read images and extract features
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        cur_images_path)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})

                all_feats[idx, :] = feats
                all_labels[idx] = [cur_labels] * num_of_image_per_video

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path_feats)
            all_labels = np.array(all_labels)
            hickle.dump(all_labels, save_path_labels_all)
            print("Saved %s.." % save_path_feats)
Beispiel #10
0
def main():

    start = datetime.now()

    caption_file = 'data/annotations/captions_train2014.json'
    image_dir = 'image/train2014_resized'
    max_length = 15
    word_count_threshold = 1
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
    batch_size = 50

    print '1. Building Top 1K dictionary from Train dataset'

    if not os.path.exists('./data/top1k.pkl'):
        train_dataset = _process_caption_data(caption_file=caption_file,
                                              image_dir=image_dir,
                                              max_length=max_length)
        word_to_idx = _build_vocab(annotations=train_dataset,
                                   threshold=word_count_threshold)
        save_pickle(word_to_idx, './data/word_to_idx.pkl')
        top1k = _build_top1k_noun()
        save_pickle(top1k, './data/top1k.pkl')
    else:
        top1k = load_pickle('./data/top1k.pkl')

    print '2. Download and Process each keywords'

    cur_dir = os.getcwd()
    wnid_idx = 0
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    captions = {}

    if not os.path.exists('./data/imagenet/features/'):
        os.makedirs('./data/imagenet/features/')
    for wnid_idx, (wnid, word) in enumerate(top1k):
        save_path = './data/imagenet/features/%s.list' % wnid
        if not os.path.exists(save_path):
            print ' ----- Processing %s, %s, %d / %d' % (wnid, word, wnid_idx,
                                                         len(top1k))
            print '\tdownloading'
            pre_url = 'http://www.image-net.org/download/synset?wnid='
            post_url = '&username=intuinno&accesskey=6be8155ee3d56b5120241b3bda13412d3cc0cd42&release=latest&src=stanford'
            testfile = urllib.URLopener()
            try:
                testfile.retrieve(pre_url + wnid + post_url, wnid + '.tar')
            except IOError as e:
                print 'Failed to download'
            else:

                original_dir = './data/imagenet/photos/%s/original/' % wnid
                resized_dir = './data/imagenet/photos/%s/resized/' % wnid

                if not os.path.exists(original_dir):
                    os.makedirs(original_dir)
                    os.makedirs(resized_dir)
                    os.rename(wnid + '.tar', original_dir + 'data.tar')
                    os.chdir(original_dir)
                    tar = tarfile.open('data.tar')
                    tar.extractall()
                    tar.close()
                    os.remove('data.tar')
                    os.chdir(cur_dir)
                else:
                    os.remove('%s.tar' % wnid)

                print '\tresizing'
                resized_files = []
                image_files = os.listdir(original_dir)
                for i, image_file in enumerate(image_files):
                    #     from IPython.core.debugger import Tracer; Tracer()()
                    try:
                        image = Image.open(
                            os.path.join(original_dir, image_file))
                    except IOError as e:
                        print 'Error: cannot open %s' % (os.path.join(
                            original_dir, image_file))
                    else:
                        image = resize_image(image)
                        image.save(os.path.join(resized_dir, image_file),
                                   image.format)
                        resized_files.append(image_file)

                image_files = resized_files
                print '\tget vgg19 image features'
                with tf.Session() as sess:
                    tf.initialize_all_variables().run()
                    n_examples = len(image_files)
                    all_feats = np.ndarray([n_examples, 196, 512],
                                           dtype=np.float32)

                    for start, end in zip(
                            range(0, n_examples, batch_size),
                            range(batch_size, n_examples + batch_size,
                                  batch_size)):
                        image_batch_file = image_files[start:end]
                        image_batch = np.array(
                            map(
                                lambda x: ndimage.imread(
                                    os.path.join(resized_dir, x), mode='RGB'),
                                image_batch_file)).astype(np.float32)
                        feats = sess.run(
                            vggnet.features,
                            feed_dict={vggnet.images: image_batch})
                        all_feats[start:end, :] = feats

                save_path = './data/imagenet/features/%s.hkl' % wnid
                hickle.dump(all_feats, save_path)
                save_path = './data/imagenet/features/%s.list' % wnid
                save_pickle(image_files, save_path)

                print "\tSaved %s.." % save_path
Beispiel #11
0
def main():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.  
    max_length = 20
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    # vgg model path 
    vgg_model_path = '/home/jason6582/sfyc/attention-tensorflow/imagenet-vgg-verydeep-19.mat'

    # about 80000 images and 400000 captions for train dataset
    train_dataset = _process_caption_data(caption_file='/home/jason6582/sfyc/attention-tensorflow/nus-wide/lite_train.json',
                                          image_dir='/home/jason6582/sfyc/NUS-WIDE/flickrfeature_resized/',
                                          max_length=max_length)
    # about 40000 images and 200000 captions
    test_dataset = _process_caption_data(caption_file='/home/jason6582/sfyc/attention-tensorflow/nus-wide/lite_test.json',
                                        image_dir='/home/jason6582/sfyc/NUS-WIDE/flickrfeature_resized/',
                                        max_length=max_length)
    # about 4000 images and 20000 captions for val / test dataset
    val_cutoff = int(len(train_dataset)/10)
    val_dataset = train_dataset[:val_cutoff]
    train_dataset = train_dataset[val_cutoff:]
    print 'Finished processing caption data'
    train_cutoff = [0]
    for i in range(15):
        train_cutoff.append(int(len(train_dataset)/16)*(i+1))
    for i in range(15):
        save_pickle(train_dataset[train_cutoff[i]:train_cutoff[i+1]],
                'data/train/train.annotations81_%s.pkl' % str(i))
    save_pickle(train_dataset[train_cutoff[15]:],'data/train/train.annotations81_15.pkl')
    save_pickle(val_dataset, 'data/val/val.annotations81.pkl')
    save_pickle(test_dataset.reset_index(drop=True), 'data/test/test.annotations81.pkl')

    split = 'train'
    word_to_idx = {}
    for part in range(16):
        annotations = load_pickle('./data/%s/%s.annotations81_%s.pkl' % (split, split, str(part)))
        word_to_idx_part = _build_vocab(annotations=annotations, threshold=word_count_threshold)
        for key in word_to_idx_part:
            word_to_idx[key] = 0
    word_list = sorted(word_to_idx.iterkeys())
    for i, word in enumerate(word_list):
        word_to_idx[word] = i
    save_pickle(word_to_idx, './data/%s/word_to_idx81.pkl' % (split))
    for part in range(16):
        annotations = load_pickle('./data/%s/%s.annotations81_%s.pkl' % (split, split, str(part)))
        captions = _build_caption_vector(annotations=annotations, word_to_idx=word_to_idx, max_length=max_length)
        save_pickle(captions, './data/%s/%s.captions81_%s.pkl' % (split, split, str(part)))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names81_%s.pkl' % (split, split, str(part)))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs, './data/%s/%s.image.idxs81_%s.pkl' % (split, split, str(part)))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'], annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions, './data/%s/%s.references81_%s.pkl' % (split, split, str(part)))
        print "Finished building %s caption dataset" %split
    
    for split in ['val', 'test']:
        annotations = load_pickle('./data/%s/%s.annotations81.pkl' % (split, split))
        captions = _build_caption_vector(annotations=annotations, word_to_idx=word_to_idx, max_length=max_length)
        save_pickle(captions, './data/%s/%s.captions81.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names81.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs, './data/%s/%s.image.idxs81.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'], annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions, './data/%s/%s.references81.pkl' % (split, split))
        print "Finished building %s caption dataset" %split

    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        split = 'train'
        for part in range(16):
            print "part", part, "of %s features" % split
            anno_path = './data/%s/%s.annotations81_%s.pkl' % (split, split, str(part))
            save_path = './data/%s/%s.features81_%s.hkl' % (split, split, str(part))
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)
            # print all_feats.shape
            for start, end in zip(range(0, n_examples, batch_size),
                                range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print ("Processed %d %s features.." % (end, split))
            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print ("Saved %s.." % (save_path))
        # for split in ['val', 'test']:
        for split in ['test']:
            anno_path = './data/%s/%s.annotations81.pkl' % (split, split)
            save_path = './data/%s/%s.features81.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)
            # print all_feats.shape
            for start, end in zip(range(0, n_examples, batch_size),
                                  range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(
                        np.float32)
                feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print ("Processed %d %s features.." % (end, split))
                # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print ("Saved %s.." % (save_path))
def main():
    PATH = os.getcwd()
    vgg_model_path = PATH + '/data/imagenet-vgg-verydeep-19.mat'
    data_dir = '../Dataset/data/tobii/'
    num_of_image_per_video = 17
    type = ['test']
    # type = ['train', 'val', 'test']
    # TIME = str(datetime.now())
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for each in type:
            # settle down the paths
            if each == 'train':
                session = '0409-b'
            elif each == 'val':
                session = '0409-c'
            elif each == 'test':
                session = '0409-e'

            img_dir = '%s/frames/' % (data_dir + session)
            label_dir = '%s/label_all.txt' % (data_dir + session)
            path = PATH + '/data/data_set/' + each + '/'

            # # generate images_path
            images_list = natsorted([
                img_dir + file for file in os.listdir(img_dir)
                if file.endswith('.jpg')
            ])
            # cur_images_path = [vf + '/' + image for image in images_list]
            step = int(float(len(images_list)) / float(num_of_image_per_video))
            print(step)
            all_feats = np.ndarray([step, num_of_image_per_video, 196, 512],
                                   dtype=np.float32)

            # read images and extract features
            for i in range(step):
                print('Processing No.' + str(i + 1) + '/%d batch..' % step)
                cur_images_path = images_list[i * 17:i * 17 + 17]
                image_batch = []
                for img_file in cur_images_path:
                    img = image.load_img(img_file, target_size=[224, 224])
                    x = image.img_to_array(img)
                    image_batch.append(x)

                image_batch = np.array(image_batch).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})

                all_feats[i, :] = feats

            label = []
            with open(label_dir, 'r') as f:
                for line in open(label_dir):
                    line = f.readline().strip().split(',')
                    label.append(line[1])
            label_reshape = np.array(label)
            label_reshape = label_reshape[:step * 17].reshape(step, 17)
            filenames_new = np.array(list(range(step)))
            train_data = {
                'features': all_feats,
                'labels': label_reshape,
                'new_filename': filenames_new
            }
            # use hickle to save huge feature vectors
            with open(each + '_data_vgg' + '.pkl', 'wb') as f:
                pickle.dump(train_data, f)
def main(params):

    batch_size = params['batch_size']
    max_length = params['max_length']
    word_count_threshold = params['word_count_threshold']
    vgg_model_path = params['vgg_model_path']

    splits = ['val', 'test']
    for split in splits:
        annotations = load_pickle('./data/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = build_word_to_idx(annotations['caption'],
                                            word_count_threshold)
            save_pickle(word_to_idx, './data/%s/word_to_idx.pkl' % split)
            captions = build_caption_vectors(annotations, word_to_idx,
                                             max_length)
            save_pickle(captions, './data/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = get_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split))

        if split == 'train':
            image_idxs = get_image_idxs(annotations, id_to_idx)
            save_pickle(image_idxs,
                        './data/%s/%s.image.idxs.pkl' % (split, split))

        # Prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    './data/%s/%s.references.pkl' % (split, split))

    # Extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in splits:
            anno_path = './data/%s/%s.annotations.pkl' % (split, split)
            save_path = './data/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_list = list(annotations['file_name'].unique())
            if split == 'train':
                image_path = map(
                    lambda x: os.path.join('./image/train2014_resized', str(x)
                                           ), image_list)
            else:
                image_path = map(
                    lambda x: os.path.join('./image/val2014_resized', str(x)),
                    image_list)
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        image_batch_file)).astype(np.float32)

                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # Normalize feature vectors
            all_feats = np.reshape(all_feats, [-1, 512])
            mean = np.mean(all_feats, 0)
            var = np.var(all_feats, 0)
            all_feats = (all_feats - mean) / np.sqrt(var)
            all_feats = np.reshape(all_feats, [-1, 196, 512])

            # Use hickle to save huge numpy array
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
Beispiel #14
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 15  #15
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'

    #path to resized images
    i_fp = './image/2014_resized/'
    #n_images = 67691
    #building dataset
    print 'Start processing caption data'
    train_dataset = get_caption_data(i_fp, max_length)
    print 'Finished processing caption data'

    #train, val, and test --> 70, 15, and 15
    train_cutoff = int(0.70 * len(train_dataset))
    val_cutoff = int(0.85 * len(train_dataset))

    #path to data directory
    d_fp = './data'
    if not os.path.exists(d_fp + '/train'):
        os.makedirs(d_fp + '/train')
    if not os.path.exists(d_fp + '/val'):
        os.makedirs(d_fp + '/val')
    if not os.path.exists(d_fp + '/test'):
        os.makedirs(d_fp + '/test')

    save_pickle(train_dataset[:train_cutoff],
                d_fp + '/train/train.annotations.pkl')
    save_pickle(train_dataset[train_cutoff:val_cutoff].reset_index(drop=True),
                d_fp + '/val/val.annotations.pkl')
    save_pickle(train_dataset[val_cutoff + 1:].reset_index(drop=True),
                d_fp + '/test/test.annotations.pkl')

    ################# train, val, and test data saved #####################

    for split in ['train', 'val', 'test']:
        annotations = load_pickle(d_fp + '/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = _build_vocab(annotations=annotations,
                                       threshold=word_count_threshold)
            save_pickle(word_to_idx, d_fp + '/%s/word_to_idx.pkl' % split)

        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_length)
        save_pickle(captions, d_fp + '/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names,
                    d_fp + '/%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs,
                    d_fp + '/%s/%s.image.idxs.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    d_fp + '/%s/%s.references.pkl' % (split, split))
        print "Finished building %s caption dataset" % split

    #extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in ['train', 'val', 'test']:
            anno_path = d_fp + '/%s/%s.annotations.pkl' % (split, split)
            save_path = d_fp + '/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                print start, '-', end
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
Beispiel #15
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 15
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1

    train_caption_file = TRAIN_DATA_PATH + '/caption_train_annotations_20170902.json'
    image_dir = TRAIN_DATA_PATH + '/caption_train_images_20170902/'
    val_caption_file = VAL_DATA_PATH + '/caption_validation_annotations_20170910.json'
    val_image_dir = VAL_DATA_PATH + '/caption_validation_images_20170910/'

    train_dataset = _process_caption_data(train_caption_file, image_dir,
                                          max_length)
    val_dataset = _process_caption_data(val_caption_file, val_image_dir,
                                        max_length)
    # init make dirs
    sub_train_split = ['train' + str(i) for i in range(21)]
    split_parts = ['train', 'val', 'test'] + sub_train_split
    for split in split_parts:
        path = 'data/' + split
        if not os.path.exists(path):
            os.makedirs(path)

    save_pickle(train_dataset, 'data/train/train.annotations.pkl')
    save_pickle(val_dataset[:-5 * 4000].reset_index(drop=True),
                'data/val/val.annotations.pkl')
    save_pickle(val_dataset[-5 * 4000:].reset_index(drop=True),
                'data/test/test.annotations.pkl')

    block_size = len(train_dataset) / 21
    for i in range(21):
        save_pickle(
            train_dataset[i * block_size:(i + 1) *
                          block_size].reset_index(drop=True),
            'data/train%d/train%d.annotations.pkl' % (i, i))

    for split in split_parts:
        annotations = load_pickle('./data/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = _build_vocab(annotations=annotations,
                                       threshold=word_count_threshold)
            save_pickle(word_to_idx, './data/%s/word_to_idx.pkl' % split)

        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_length)
        save_pickle(captions, './data/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs, './data/%s/%s.image.idxs.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = set()
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids.add(image_id)
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption + ' .')
        save_pickle(feature_to_captions,
                    './data/%s/%s.references.pkl' % (split, split))
        print "Finished building %s caption dataset" % split

    # extract conv5_3 feature vectors
    init_op = tf.initialize_all_variables()
    sess = tf.Session()
    sess.run(init_op)
    tf.reset_default_graph()
    vggnet = Vgg19(VGG_MODEL_PATH)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in split_parts[1:]:
            anno_path = './data/%s/%s.annotations.pkl' % (split, split)
            save_path = './data/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['image_file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: np.array(resize_image(Image.open(x))),
                        image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
Beispiel #16
0
from core.vggnet import Vgg19
import tensorflow as tf
import numpy as np
import json

import moxel
from moxel.space import Image, String, Array


vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
vggnet = Vgg19(vgg_model_path)
vggnet.build()


def predict(image):
    image.resize((224, 224))
    image_batch = np.array([image.to_numpy()]).astype(np.float32)
    with tf.Session() as sess:
        tf.initialize_all_variables().run()

        feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
    return {
        # 'feature': String.from_str(str(feats))
        'feature': Array.from_numpy(feats)
    }


Beispiel #17
0
    def test_custom(self, image_dir, vgg_model_path, attention_visualization=True):
        '''
        Args:
            - data: dictionary with the following keys:
            - image_file: Image file name
            - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook)
        '''
        # build a graph to sample captions
        alphas, betas, sampled_captions = self.model.build_sampler(max_len=20)    # (N, max_len, L), (N, max_len)
        
        # image files:
        import glob, os
        image_files = []
        for file in glob.glob(image_dir+"*.jpg"):
            image_files.append(file)
        
        # read in image feature
        image_features = []
        imgs = []
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session() as sess:
                vggnet = Vgg19(vgg_model_path)
                vggnet.build()            
                tf.initialize_all_variables().run()
                for image_file in image_files:
                    with open(image_file, 'r+b') as f:
                        with Image.open(f) as image:
                            img = np.asarray(resize_image(image))
                            imgs.append(img)
                image_batch = np.array(imgs)
                image_features = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True            
        with tf.Session(config=config) as sess:
            saver = tf.train.Saver()
            saver.restore(sess, self.test_model)
            feed_dict = { self.model.features: image_features }
            alps, bts, sam_cap = sess.run([alphas, betas, sampled_captions], feed_dict)  # (N, max_len, L), (N, max_len)
            decoded = decode_captions(sam_cap, self.model.idx_to_word)
            for i in range(len(imgs)):
                img = imgs[i]
                print( "Sampled Caption: %s" %decoded[i] )
                # Plot original image
                #plt.figure(figsize=(18,9))
                plt.subplot(4, 5, 1)
                plt.imshow(img)
                plt.axis('off')
                if attention_visualization:
                    # Plot images with attention weights
                    words = decoded[i].split(" ")
                    for t in range(len(words)):
                        if t > 18:
                            break
                        plt.subplot(4, 5, t+2)
                        plt.text(0, 1, '%s(%.2f)'%(words[t], bts[i,t]) , color='black', backgroundcolor='white', fontsize=8)
                        plt.imshow(img)
                        alp_curr = alps[i,t,:].reshape(14,14)
                        alp_img = skimage.transform.pyramid_expand(alp_curr, upscale=16, sigma=20)
                        plt.imshow(alp_img, alpha=0.85)
                        plt.axis('off')
                    plt.show()
Beispiel #18
0
from collections import Counter
from core.vggnet import Vgg19
from core.utils import *

import tensorflow as tf
import numpy as np
import pandas as pd
import hickle
import os
import json
import jieba



batch_size = 50
vggnet = Vgg19('./ai.challenger/data/imagenet-vgg-verydeep-19.mat')
vggnet.build()
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

#with tf.Session(config = config) as sess:
#    init = tf.global_variables_initializer()
#    sess.run(init)
#    for split in ['train']:
#        anno_path = './data/%s/%s.annotations.pkl' % (split, split)
#        
#        annotations = load_pickle(anno_path)
#        image_path = list(annotations['file_name'].unique())
#        n_examples = len(image_path)
#        
Beispiel #19
0
    def __init__(self,
                 sess,
                 word_to_idx,
                 dim_embed=512,
                 dim_hidden=1024,
                 n_time_step=16,
                 prev2out=True,
                 ctx2out=True,
                 emo2out=True,
                 alpha_c=0.0,
                 selector=True,
                 dropout=True,
                 update_rule='adam',
                 learning_rate=None,
                 vgg_model_path='./data/imagenet-vgg-verydeep-19.mat',
                 features_extractor='vgg',
                 pretrained_model=None):
        """
        Args:
            word_to_idx: word-to-index mapping dictionary.
            dim_feature: (optional) Dimension of vggnet19 conv5_3 feature vectors.
            dim_embed: (optional) Dimension of word embedding.
            dim_hidden: (optional) Dimension of all hidden state.
            n_time_step: (optional) Time step size of LSTM.
            prev2out: (optional) previously generated word to hidden state. (see Eq (7) for explanation)
            ctx2out: (optional) context to hidden state (see Eq (7) for explanation)
            alpha_c: (optional) Doubly stochastic regularization coefficient. (see Section (4.2.1) for explanation)
            selector: (optional) gating scalar for context vector. (see Section (4.2.1) for explanation)
            dropout: (optional) If true then dropout layer is added.
        """

        self.word_to_idx = word_to_idx
        self.idx_to_word = {i: w for w, i in word_to_idx.items()}
        self.prev2out = prev2out
        self.ctx2out = ctx2out
        self.emo2out = emo2out
        self.alpha_c = alpha_c
        self.selector = selector
        self.dropout = dropout
        self.V = len(word_to_idx)

        self.M = dim_embed
        self.H = dim_hidden
        self.T = n_time_step + 1
        self._start = word_to_idx['<START>']
        self._null = word_to_idx['<NULL>']

        self.weight_initializer = tf.contrib.layers.xavier_initializer()
        self.const_initializer = tf.constant_initializer(0.0)
        self.emb_initializer = tf.random_uniform_initializer(minval=-1.0,
                                                             maxval=1.0)

        self.features_extractor = features_extractor
        if features_extractor == 'vgg':
            self.vggnet = Vgg19(vgg_model_path)
            dim_feature = [196, 512]
        elif features_extractor == 'resnet':
            self.resnet152 = ResNetFeatureExtractor(
                M.resnet152(pretrained=True).to(device), feat_layer="res5c")
            dim_feature = [49, 2048]

        self.L = dim_feature[0]
        self.D = dim_feature[1]

        # Placeholders
        self.features = tf.placeholder(tf.float32, [None, self.L, self.D])
        self.captions = tf.placeholder(tf.int32, [None, self.T + 1])
        self.emotions = tf.placeholder(tf.float32, [None, 3])
        self.rewards = tf.placeholder(
            tf.float32,
            shape=[None, self.T])  # get from rollout policy and discriminator
        self.mode_learning = tf.placeholder(tf.int32)

        # Build graphs for training model and sampling captions
        with tf.variable_scope(tf.get_variable_scope()):
            self.loss = self.build_model()
            tf.get_variable_scope().reuse_variables()
            _, _, self.generated_captions = self.build_sampler()

        # ---set an optimizer by update rule
        if update_rule == 'adam':
            self.optimizer = tf.train.AdamOptimizer
        elif update_rule == 'momentum':
            self.optimizer = tf.train.MomentumOptimizer
        elif update_rule == 'rmsprop':
            self.optimizer = tf.train.RMSPropOptimizer

        # ---train op
        if learning_rate:
            with tf.variable_scope(tf.get_variable_scope(), reuse=False):
                optimizer = self.optimizer(learning_rate=learning_rate)
                grads = tf.gradients(self.loss, tf.trainable_variables())
                self.grads_and_vars = list(zip(grads,
                                               tf.trainable_variables()))
                self.train_op = optimizer.apply_gradients(
                    grads_and_vars=self.grads_and_vars)

        # ---init
        self.prev_loss = -1

        self.sess = sess
        # ---load pretrained model
        self.saver = tf.train.Saver(max_to_keep=40)
        if pretrained_model is not None:
            print("Pretrained generator loaded")
            self.saver.restore(sess=self.sess,
                               save_path=os.path.join(pretrained_model,
                                                      'model.ckpt'))
        initialize_uninitialized(self.sess)
Beispiel #20
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 15
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'

    caption_file = 'data/annotations/captions_train2014.json'
    image_dir = 'image/%2014_resized/'

    # about 80000 images and 400000 captions for train dataset
    train_dataset = _process_caption_data(
        caption_file='data/annotations/captions_train2014.json',
        image_dir='image/train2014_resized/',
        max_length=max_length)

    # about 40000 images and 200000 captions
    val_dataset = _process_caption_data(
        caption_file='data/annotations/captions_val2014.json',
        image_dir='image/val2014_resized/',
        max_length=max_length)

    # about 4000 images and 20000 captions for val / test dataset
    val_cutoff = int(0.1 * len(val_dataset))
    test_cutoff = int(0.2 * len(val_dataset))
    print('Finished processing caption data')

    save_pickle(train_dataset, 'data/train/train.annotations.pkl')
    save_pickle(val_dataset[:val_cutoff], 'data/val/val.annotations.pkl')
    save_pickle(val_dataset[val_cutoff:test_cutoff].reset_index(drop=True),
                'data/test/test.annotations.pkl')

    for split in ['train', 'val', 'test']:
        annotations = load_pickle('./data/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = _build_vocab(annotations=annotations,
                                       threshold=word_count_threshold)
            save_pickle(word_to_idx, './data/%s/word_to_idx.pkl' % split)

        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_length)
        save_pickle(captions, './data/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs, './data/%s/%s.image.idxs.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    './data/%s/%s.references.pkl' % (split, split))
        print("Finished building %s caption dataset" % split)

    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in ['train', 'val', 'test']:
            anno_path = './data/%s/%s.annotations.pkl' % (split, split)
            save_path = './data/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
Beispiel #21
0
def get_featrues():
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    img_path = r"C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\JPEGImages"
    resized_img_path = r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\resized'
    for image in os.listdir(img_path):

        pil_im = Image.open(os.path.join(img_path, image))
        size = 224, 224
        pil_im = pil_im.resize(size)
        pil_im.save(os.path.join(resized_img_path, image))

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        image_batch_file = []

        for image in os.listdir(
                r"C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\resized"
        ):

            image_batch_file.append(
                os.path.join(
                    r".\image_data_to_be_labeled\Object_feature\resized",
                    image.rstrip('\n')))
        print(len(image_batch_file))
        # for image in image_batch_file:
        #     img = Image.open(image)
        #     img = img.resize((224,224))
        #     img.save(os.path.join(r".\image_data_to_be_labeled\resized_image",os.path.basename(image)))
        f = open(
            r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\train_list.txt',
            'w')
        f.close()
        # f = open(r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\test_list.txt','w')
        # f.close()
        train_batch_file = image_batch_file.copy()
        with open(
                r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\train_list.txt',
                'a') as f:
            for train in train_batch_file:
                f.write(train + '\n')
        #
        # test_batch_file = image_batch_file[220:]
        # with open(r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\test_list.txt','a') as f:
        #     for test in test_batch_file:
        #         f.write(test+'\n')

        train_feats = np.ndarray([len(train_batch_file), 196, 512],
                                 dtype=np.float32)
        # test_feats = np.ndarray([len(test_batch_file), 196, 512], dtype=np.float32)
        train_batch = []
        # test_batch = []
        for image in train_batch_file:
            image_read = ndimage.imread(image, mode='RGB').astype(np.float32)
            train_batch.append(image_read)

        # for image in test_batch_file:
        #     image_read = ndimage.imread(image, mode='RGB').astype(np.float32)
        #     test_batch.append(image_read)

        train_batch = np.array(train_batch)
        # test_batch = np.array(test_batch)

        print(train_batch.shape)
        # print(test_batch.shape)
        # train_feats = np.ndarray([220, 196, 512], dtype=np.float32)
        # test_feats = np.ndarray([70, 196, 512], dtype=np.float32)
        for i in range(22):
            train_feats[i * 10:(i + 1) * 10] = sess.run(
                vggnet.features,
                feed_dict={vggnet.images: train_batch[i * 10:(i + 1) * 10]})

        # for j in range(7):
        #     test_feats[j*10:(j+1)*10] = sess.run(vggnet.features, feed_dict={vggnet.images: test_batch[j*10:(j+1)*10]})

        print(train_feats.shape)
        # print(test_feats.shape)

    # use hickle to save huge feature vectors
    hickle.dump(
        train_feats,
        r"C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\our_data\train.features.hkl"
    )