def main(*_fold_info, **kwargs): args = rasengan.Namespace() args = args_creation_part1(args) args = update_args(args, kwargs) data = transducer_data.main(args) args = args_creation_part2(args, data) args = update_args(args, kwargs) args.diff_kwargs = kwargs # Start each run in a fresh directory to avoid interference # with other running processes and avoiding overwriting results of # previous processes. import os idx = 0 while os.path.exists(args.folder + '_' + str(idx)): idx += 1 args.folder = args.folder + '_' + str(idx) print 'Set args.folder to', args.folder if __name__ != '__main__': # We are probably running from the hpolibrary. # In non-interactive batch runs, it is also important to # disable debug_support so that we dont call post-mortem in # case of an exception. rasengan.disable_debug_support() else: #rasengan.warn('NOTE: I am using pretrained pkl') # args.pretrained_param_pklfile = (args.folder + r'/' + args.pkl_name) pass with lstm_seqlabel_circuit_compilation.make(args, force=True): error = lstm_seqlabel_circuit_compilation.perform_training_and_testing( "", args, data) return error
def get_train_test_namespace(args): if args.perform_training: print 'Compiling train_model' train_model = compile_args(args) set_dropout_to_zero(args) print 'Compiling test_model' test_model = compile_args(args) # Prepare the `ttns` namespace by adding train and test prefixes. ttns = rasengan.Namespace('ttns') if args.perform_training: ttns = ttns.update_and_append_prefix(train_model, 'train_') ttns = ttns.update_and_append_prefix(test_model, 'test_') return ttns
''' | Filename : test_conjunctivemixture.py | Description : Test ConjunctiveMixture Chip | Author : Pushpendre Rastogi | Created : Mon Nov 16 01:00:45 2015 (-0500) | Last-Updated: Mon Nov 16 01:08:20 2015 (-0500) | By: Pushpendre Rastogi | Update #: 3 ''' import rasengan import lstm_seqlabel_circuit import lstm_seqlabel_circuit_compilation import util_lstm_seqlabel args = rasengan.Namespace() args.conjmix_embed_BOS = 1 args.conjmix_clip_gradient = 0 chips = [(lstm_seqlabel_circuit.ConjunctiveMixture, 'conjmix')] with util_lstm_seqlabel.debug_support(): ttns = lstm_seqlabel_circuit_compilation.get_train_test_namespace(args) # Test value of ttns
def main(args): with rasengan.debug_support(): with rasengan.tictoc("Loading Data"): data_list = rasengan.namespacer(read_data(args.train_fn)) val_data_list = rasengan.namespacer(read_data(args.dev_fn)) if args.partition_dev_into_train > 0: lim = args.partition_dev_into_test data_list.extend(val_data_list[lim:]) val_data_list = val_data_list[:lim] if args.partition_dev_into_test > 0: lim = args.partition_dev_into_test test_data_list = val_data_list[lim:] val_data_list = val_data_list[:lim] else: test_data_list = rasengan.namespacer(read_data(args.test_fn)) # data_list = val_data_list = [(u'jason', u'eisner')] lst_char = get_lst_char(data_list + val_data_list + test_data_list) data_list = add_bos(data_list) val_data_list = add_bos(val_data_list) test_data_list = add_bos(test_data_list) warnings.warn(''' NOTE: While preparing sigma, we add 1 to the index returned by enumerate because the transducer unit that Ryan wrote uses index 0 as the index for the epsilon symbol. So essentially the epsilon symbol and the integer 0 are reserved symbols that cannot appear in the vocabulary. ALSO, we need to add 1 to the vocsize because of that. ''') # sigma :: char -> int sigma = dict((b, a + 1) for (a, b) in enumerate(lst_char)) # sigma_inv :: int -> char sigma_inv = dict((a + 1, b) for (a, b) in enumerate(lst_char)) if args.limit_corpus > 0: data_list = data_list[:args.limit_corpus] train_data = numerize(data_list, sigma, args.win) val_data = numerize(val_data_list, sigma, args.win) test_data = numerize(test_data_list, sigma, args.win) data = rasengan.Namespace() #-------------------------------------------------------------# # Add sets that would be used by the tensorflow seq2seq # # model. See~$PY/tensorflow/models/rnn/translate/translate.py # #-------------------------------------------------------------# data.train_data = data_list data.val_data = val_data_list data.test_data = test_data_list data.train_set = train_data data.dev_set = val_data data.test_set = test_data data.vocsize = len(sigma) + 1 data.idx2label = sigma_inv data.label2idx = sigma data.train_lex = [e[0] for e in train_data] data.train_y = [e[1] for e in train_data] data.valid_lex = [e[0] for e in val_data] data.valid_y = util_lstm_seqlabel.convert_id_to_word( [e[1] for e in val_data], data.idx2label) data.test_lex = [e[0] for e in test_data] data.test_y = util_lstm_seqlabel.convert_id_to_word( [e[1] for e in test_data], data.idx2label) data.words_train = [] data.words_valid = [] data.words_test = [] return data