def __init__(self, dataset_name): self.dataset_name = dataset_name print() print('=' * 100) print('Starting dataset:', dataset_name) datasources = helper_datasources.DataSources(dataset_name) lib.create_dir(config.base_dir) self.completed = set() self.dataset = data.Dataset( min_token_freq=config.min_token_freq, training_datasource=datasources.train, validation_datasource=datasources.val, testing_datasource=datasources.test, ) self.dataset.process() print('Num training captions: ', datasources.train.size) print('Max training caption length:', self.dataset.training_proccaps.prefixes_indexes.shape[1] - 1) print('Vocab size: ', self.dataset.vocab_size) print() self.mean_training_caps_len = np.mean([ len(cap) for caption_group in datasources.train.caption_groups for cap in caption_group ]) self.known_train_caps = { ' '.join(cap) for caption_group in datasources.train.caption_groups for cap in caption_group } self.all_str_test_caps = [[ ' '.join(cap) for cap in caption_group ] for caption_group in datasources.test.caption_groups] self.test_caps = datasources.test.first_captions self.test_imgs = datasources.test.images self.test_caps_ret = datasources.test.first_captions[:1000] self.test_imgs_ret = datasources.test.images[:1000] with open(config.base_dir + '/imgs_' + dataset_name + '.txt', 'w', encoding='utf-8') as f: for filename in datasources.test.image_filenames: print(str(filename), file=f) with open(config.base_dir + '/caps_' + dataset_name + '.txt', 'w', encoding='utf-8') as f: for cap in datasources.test.first_captions: print(str(' '.join(cap)), file=f) #Prepare MSCOCO evaluation toolkit with open(config.mscoco_dir + '/annotations/captions.json', 'w', encoding='utf-8') as f: print(str( json.dumps({ 'info': { 'description': None, 'url': None, 'version': None, 'year': None, 'contributor': None, 'date_created': None, }, 'images': [{ 'license': None, 'url': None, 'file_name': None, 'id': image_id, 'width': None, 'date_captured': None, 'height': None } for image_id in range( len(datasources.test.caption_groups))], 'licenses': [], 'type': 'captions', 'annotations': [{ 'image_id': image_id, 'id': caption_id, 'caption': ' '.join(caption) } for (caption_id, (image_id, caption)) in enumerate( (image_id, caption) for (image_id, caption_group ) in enumerate(datasources.test.caption_groups) for caption in caption_group)] })), file=f) if not lib.file_exists(config.base_dir + '/results.txt'): with open(config.base_dir + '/results.txt', 'w', encoding='utf-8') as f: print(*[ 'dataset_name', 'architecture', 'run', 'vocab_size', 'num_training_caps', 'mean_training_caps_len', 'num_params', 'geomean_pplx', 'num_inf_pplx', 'vocab_used', 'vocab_used_frac', 'mean_cap_len', 'num_existing_caps', 'num_existing_caps_frac', 'existing_caps_CIDEr', 'unigram_entropy', 'bigram_entropy', 'CIDEr', 'METEOR', 'ROUGE_L', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'R@1', 'R@5', 'R@10', 'median_rank', 'R@1_frac', 'R@5_frac', 'R@10_frac', 'median_rank_frac', 'num_epochs', 'training_time', 'total_time', ], sep='\t', file=f) else: with open(config.base_dir + '/results.txt', 'r', encoding='utf-8') as f: for line in f.readlines()[1:]: [ dataset_name, architecture, run, ] = line.split('\t')[:3] full_name = '_'.join([dataset_name, architecture, run]) self.completed.add(full_name)
_ ] = line.split('\t') img_dists[dataset_name].append({ 'max': int(max_sim_image), 'med': int(med_sim_image), 'min': int(min_sim_image) }) with open('results/caplen_freqs.txt', 'r', encoding='utf-8') as f: caplen_freqs = {'mscoco': []} for line in f.read().strip().split('\n')[1:]: [dataset_name, architecture, cap_len, freq] = line.split('\t') caplen_freqs[(dataset_name, architecture, int(cap_len))] = int(freq) for dataset_name in ['mscoco']: # 'flickr8k', 'flickr30k', datasources = helper_datasources.DataSources(dataset_name) for architecture in ['init', 'pre', 'par', 'merge']: output_entries = collections.defaultdict(lambda: {'jsd': list()}) print('{}_{}_{}'.format(dataset_name, architecture, run)) dataset = data.Dataset() dataset.minimal_load('model_data/{}_{}_{}'.format( dataset_name, architecture, run)) with model_normal.NormalModel( dataset=dataset, init_method=config.hyperparams[architecture]['init_method'], min_init_weight=config.hyperparams[architecture] ['min_init_weight'], max_init_weight=config.hyperparams[architecture]
('learnable_init_state', [False, True]), ('optimizer', ['adam']), #[ 'rmsprop', 'adam', 'adagrad' ] ('learning_rate', [0.001]), #[ 1e-4, 1e-3, 1e-2, 1e-1 ] ('normalize_image', [False, True]), ('weights_reg_weight', [0.0, 1e-8 ]), #[ 0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1 ] ('image_dropout_prob', [0.0, 0.5]), ('post_image_dropout_prob', [0.0, 0.5]), ('embedding_dropout_prob', [0.0, 0.5]), ('rnn_dropout_prob', [0.0, 0.5]), ('train_minibatch_size', [32, 64, 128]), ] print('#' * 100) print('Exploring hyperparameters on flickr8k') datasources = helper_datasources.DataSources('flickr8k') lib.create_dir(config.base_dir_hyperpar) dataset = data.Dataset( min_token_freq=config.min_token_freq, training_datasource=datasources.train, validation_datasource=datasources.val, ) dataset.process() val_caps = [[' '.join(cap) for cap in cap_group] for cap_group in datasources.val.caption_groups] val_imgs = datasources.val.images