def generate_features(self, out_file, feat_extractor, transform_type): data = self.train_data+self.val_data+self.test_data transform = data_utils.imagenet_transform('test', transform_type) if feat_extractor is None: feat_extractor = torchvision.models.resnet18(pretrained=True) feat_extractor.fc = torch.nn.Sequential() feat_extractor.eval().cuda() image_feats = [] image_files = [] for chunk in tqdm.tqdm(data_utils.chunks(data, 512), total=len(data)//512): files = zip(*chunk)[0] imgs = list(map(self.loader, files)) imgs = list(map(transform, imgs)) feats = feat_extractor(torch.stack(imgs, 0).cuda()) image_feats.append(feats.data.cpu()) image_files += files image_feats = torch.cat(image_feats, 0) print ('features for %d images generated'%(len(image_files))) torch.save({'features': image_feats, 'files': image_files}, out_file)
def train(self, pivot_ids, target_ids, doc_ids, num_epochs, idx_to_word, switch_loss_epoch=5, save_every=1, report_every=1, print_topics_every=5): data_size = len(pivot_ids) temp_fraction = self.batch_size * 1.0 / data_size self.sesh.run(tf.assign(self.fraction, temp_fraction)) iters_per_epoch = int(data_size / self.batch_size) + np.ceil( data_size % self.batch_size) switch_loss_step = iters_per_epoch * switch_loss_epoch self.sesh.run(tf.assign(self.switch_loss, switch_loss_step)) if self.save_graph: saver = tf.train.Saver() writer = tf.summary.FileWriter(self.logdir + '/', graph=self.sesh.graph) for epoch in range(num_epochs): print('\nEPOCH:', epoch + 1) for pivot, target, doc in chunks(self.batch_size, pivot_ids, target_ids, doc_ids): feed_dict = {self.x: pivot, self.y: target, self.docs: doc} fetches = [ self.merged, self.optimizer, self.loss, self.loss_word2vec, self.loss_lda, self.step ] summary, _, l, lw2v, llda, step = self.sesh.run( fetches, feed_dict=feed_dict) if (epoch + 1) % report_every == 0: print('Loss: ', l, 'Word2Vec Loss: ', lw2v, 'LDA loss: ', llda) if (epoch + 1) % save_every == 0 and self.save_graph: writer.add_summary(summary, step) writer.flush() writer.close() save_path = saver.save(self.sesh, self.logdir + '/model.ckpt') writer = tf.summary.FileWriter(self.logdir + '/', graph=self.sesh.graph) if epoch > 0 and (epoch + 1) % print_topics_every == 0: idxs = np.arange(self.num_topics) words, sims = self.get_k_closest(idxs, idx_to_word=idx_to_word, k=10) if self.save_graph and (epoch + 1) % save_every != 0: writer.add_summary(summary, step) writer.flush() writer.close() save_path = saver.save(self.sesh, self.logdir + '/model.ckpt')