Esempio n. 1
0
import sys

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from framework import lib
from framework import model_neural_trad
from framework import evaluation
from framework import data
from framework import config

########################################################################################
lib.create_dir(config.results_dir + '/imageimportance')

architecture = 'langmod'
lib.create_dir(config.results_dir + '/imageimportance/' + architecture)
if not lib.file_exists(config.results_dir + '/imageimportance/' +
                       architecture + '/results_langmod.txt'):
    with open(config.results_dir + '/imageimportance/' + architecture +
              '/results_langmod.txt',
              'w',
              encoding='utf-8') as f:
        print('dataset',
              'run',
              'sent_len',
              'token_index',
              'gradient_wrt_prefix_next',
              'gradient_wrt_prefix_max',
              'gradient_wrt_prevtoken_next',
              'gradient_wrt_prevtoken_max',
              'gradient_wrt_firsttoken_next',
              'gradient_wrt_firsttoken_max',
              'gradient_wrt_multimodalvec_next',
Esempio n. 2
0
capgen_test = data.load_datasources('mscoco')['test'].shuffle(0).take(
    datasources['test'].num_groups, whole_groups=True
)  #MSCOCO test is never used in langmodtrans experiments so we can validate on it
del datasources

lib.create_dir(config.hyperpar_dir + '/langmodtrans')

for corpus in corpora:
    lib.create_dir(config.hyperpar_dir + '/langmodtrans/' + corpus)

    print('=' * 100)
    print(lib.formatted_clock())
    print(corpus, '1 (language model)')
    print()

    if lib.file_exists(config.hyperpar_dir + '/langmodtrans/' + corpus +
                       '/2_best.txt'):
        print('Found ready')
        print()
        continue

    print('#',
          'init_method',
          'max_init_weight',
          'embed_size',
          'rnn_size',
          'post_image_size',
          'pre_output_size',
          'post_image_activation',
          'rnn_type',
          'optimizer',
          'learning_rate',
########################################################################################
if len(sys.argv) == 1:
    corpora = 'lm1b,mscoco,flickr8k'.split(',')
else:
    corpora = sys.argv[1].split(',')

datasources = data.load_datasources(config.langmodtrans_capgen_dataset)
capgen_size = datasources['train'].size
capgen_test = datasources['test']
del datasources

lib.create_dir(config.results_dir + '/langmodtrans')

for corpus in corpora:
    lib.create_dir(config.results_dir + '/langmodtrans/' + corpus)
    if not lib.file_exists(config.results_dir + '/langmodtrans/' + corpus +
                           '/results.txt'):
        with open(config.results_dir + '/langmodtrans/' + corpus +
                  '/results.txt',
                  'w',
                  encoding='utf-8') as f:
            print('corpus',
                  'frozen_prefix',
                  'corpus_size_factor_exponent',
                  'run',
                  'corpus_size',
                  'langmod_vocab_size',
                  'langmod_num_params',
                  'langmod_mean_prob',
                  'langmod_median_prob',
                  'langmod_geomean_prob',
                  'langmod_mean_pplx',
Esempio n. 4
0
dataset.compile_sents()

test_images = dataset.test.get_images()
test_sents = dataset.test.get_text_sent_groups()

lib.create_dir(config.hyperpar_dir + '/whereimage')

for architecture in architectures:
    lib.create_dir(config.hyperpar_dir + '/whereimage/' + architecture)

    print('=' * 100)
    print(lib.formatted_clock())
    print(architecture)
    print()

    if lib.file_exists(config.hyperpar_dir + '/whereimage/' + architecture +
                       '/best.txt'):
        print('Found ready')
        print()
        continue

    print('#',
          'init_method',
          'max_init_weight',
          'embed_size',
          'rnn_size',
          'post_image_size',
          'pre_output_size',
          'post_image_activation',
          'rnn_type',
          'optimizer',
          'learning_rate',
########################################################################################
if len(sys.argv) == 1:
    corpora = 'flickr8k,mscoco,lm1b'.split(',')
else:
    corpora = sys.argv[1].split(',')

datasources = data.load_datasources(config.langmodtrans_capgen_dataset)
capgen_size = datasources['train'].size
capgen_test = datasources['test']
del datasources

lib.create_dir(config.results_dir+'/partialtraining')

for corpus in corpora:
    lib.create_dir(config.results_dir+'/partialtraining/'+corpus)
    if not lib.file_exists(config.results_dir+'/partialtraining/'+corpus+'/results1_earlystop.txt'):
        with open(config.results_dir+'/partialtraining/'+corpus+'/results1_earlystop.txt', 'w', encoding='utf-8') as f:
            print(
                    'corpus',
                    'frozen_prefix',
                    'max_epochs',
                    'run',
                    'corpus_size',
                    'langmod_vocab_size',
                    'langmod_num_params',
                    'langmod_mean_prob',
                    'langmod_median_prob',
                    'langmod_geomean_prob',
                    'langmod_mean_pplx',
                    'langmod_median_pplx',
                    'langmod_geomean_pplx',
Esempio n. 6
0
            print(' | ', end='\t')
        print(round(train_logpplx, 3), round(val_logpplx, 3), lib.format_duration(self.epoch_timer.get_duration()), sep='\t')
        self.training_prog = None
        
        
########################################################################################
datasources = data.load_datasources(config.langmodtrans_capgen_dataset)
capgen_size = datasources['train'].size
capgen_test = datasources['test']
del datasources

lib.create_dir(config.results_dir+'/randomrnn')

corpus = 'flickr8k'
lib.create_dir(config.results_dir+'/randomrnn/'+corpus)
if not lib.file_exists(config.results_dir+'/randomrnn/'+corpus+'/results.txt'):
    with open(config.results_dir+'/randomrnn/'+corpus+'/results.txt', 'w', encoding='utf-8') as f:
        print(
                'corpus',
                'frozen_prefix',
                'max_epochs',
                'run',
                'corpus_size',
                'langmod_vocab_size',
                'langmod_num_params',
                'langmod_mean_prob',
                'langmod_median_prob',
                'langmod_geomean_prob',
                'langmod_mean_pplx',
                'langmod_median_pplx',
                'langmod_geomean_pplx',
Esempio n. 7
0
              lib.format_duration(self.epoch_timer.get_duration()),
              sep='\t')
        self.training_prog = None


########################################################################################
if len(sys.argv) == 1:
    architectures = 'ceiling,merge,par,pre,init,merge-ext'.split(',')
else:
    architectures = sys.argv[1].split(',')

lib.create_dir(config.results_dir + '/whereimage')

for architecture in architectures:
    lib.create_dir(config.results_dir + '/whereimage/' + architecture)
    if not lib.file_exists(config.results_dir + '/whereimage/' + architecture +
                           '/results.txt'):
        with open(config.results_dir + '/whereimage/' + architecture +
                  '/results.txt',
                  'w',
                  encoding='utf-8') as f:
            print('architecture',
                  'dataset',
                  'run',
                  'vocab_size',
                  'num_params',
                  'mean_prob',
                  'median_prob',
                  'geomean_prob',
                  'mean_pplx',
                  'median_pplx',
                  'geomean_pplx',