Ejemplo n.º 1
0
    def generate_features(self, out_file, feat_extractor, transform_type):

        data = self.train_data+self.val_data+self.test_data
        transform = data_utils.imagenet_transform('test', transform_type)

        if feat_extractor is None:
            feat_extractor = torchvision.models.resnet18(pretrained=True)
            feat_extractor.fc = torch.nn.Sequential()
        feat_extractor.eval().cuda()

        image_feats = []
        image_files = []
        for chunk in tqdm.tqdm(data_utils.chunks(data, 512), total=len(data)//512):
            files = zip(*chunk)[0]
            imgs = list(map(self.loader, files))
            imgs = list(map(transform, imgs))
            feats = feat_extractor(torch.stack(imgs, 0).cuda())
            image_feats.append(feats.data.cpu())
            image_files += files
        image_feats = torch.cat(image_feats, 0)
        print ('features for %d images generated'%(len(image_files)))

        torch.save({'features': image_feats, 'files': image_files}, out_file)
Ejemplo n.º 2
0
    def train(self,
              pivot_ids,
              target_ids,
              doc_ids,
              num_epochs,
              idx_to_word,
              switch_loss_epoch=5,
              save_every=1,
              report_every=1,
              print_topics_every=5):
        data_size = len(pivot_ids)

        temp_fraction = self.batch_size * 1.0 / data_size

        self.sesh.run(tf.assign(self.fraction, temp_fraction))

        iters_per_epoch = int(data_size / self.batch_size) + np.ceil(
            data_size % self.batch_size)

        switch_loss_step = iters_per_epoch * switch_loss_epoch

        self.sesh.run(tf.assign(self.switch_loss, switch_loss_step))

        if self.save_graph:

            saver = tf.train.Saver()

            writer = tf.summary.FileWriter(self.logdir + '/',
                                           graph=self.sesh.graph)

        for epoch in range(num_epochs):
            print('\nEPOCH:', epoch + 1)

            for pivot, target, doc in chunks(self.batch_size, pivot_ids,
                                             target_ids, doc_ids):

                feed_dict = {self.x: pivot, self.y: target, self.docs: doc}

                fetches = [
                    self.merged, self.optimizer, self.loss, self.loss_word2vec,
                    self.loss_lda, self.step
                ]

                summary, _, l, lw2v, llda, step = self.sesh.run(
                    fetches, feed_dict=feed_dict)

            if (epoch + 1) % report_every == 0:
                print('Loss: ', l, 'Word2Vec Loss: ', lw2v, 'LDA loss: ', llda)

            if (epoch + 1) % save_every == 0 and self.save_graph:
                writer.add_summary(summary, step)
                writer.flush()
                writer.close()
                save_path = saver.save(self.sesh, self.logdir + '/model.ckpt')
                writer = tf.summary.FileWriter(self.logdir + '/',
                                               graph=self.sesh.graph)

            if epoch > 0 and (epoch + 1) % print_topics_every == 0:
                idxs = np.arange(self.num_topics)
                words, sims = self.get_k_closest(idxs,
                                                 idx_to_word=idx_to_word,
                                                 k=10)

        if self.save_graph and (epoch + 1) % save_every != 0:
            writer.add_summary(summary, step)
            writer.flush()
            writer.close()
            save_path = saver.save(self.sesh, self.logdir + '/model.ckpt')