def frequency_table(): filenames = ['nist_train_class_freq.ft','p07_train_class_freq.ft','pnist_train_class_freq.ft'] iterators = [datasets.nist_all(),datasets.nist_P07(),datasets.PNIST07()] for dataset,filename in zip(iterators,filenames): freq_table = numpy.zeros(62) for x,y in dataset.train(1): freq_table[int(y)]+=1 f = open(filename,'w') ft.write(f,freq_table) f.close()
def build_test_valid_sets(): nist_ds = datasets.nist_all() pnist_ds = datasets.PNIST07() p07_ds = datasets.nist_P07() test_valid_fns = [nist_ds.test, nist_ds.valid, pnist_ds.test, pnist_ds.valid, p07_ds.test, p07_ds.valid] test_valid_names = ["nist_all__test", "nist_all__valid", "NISTP__test", "NISTP__valid", "P07__test", "P07__valid"] return test_valid_fns, test_valid_names
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO if state.has_key('decrease_lr'): decrease_lr = state['decrease_lr'] else : decrease_lr = 0 if state.has_key('decrease_lr_pretrain'): dec=state['decrease_lr_pretrain'] else : dec=0 n_ins = 32*32 if state.has_key('subdataset'): subdataset_name=state['subdataset'] else: subdataset_name=SUBDATASET_NIST #n_outs = 62 # 10 digits, 26*2 (lower, capitals) if subdataset_name == "upper": n_outs = 26 subdataset = datasets.nist_upper() examples_per_epoch = NIST_UPPER_TRAIN_SIZE elif subdataset_name == "lower": n_outs = 26 subdataset = datasets.nist_lower() examples_per_epoch = NIST_LOWER_TRAIN_SIZE elif subdataset_name == "digits": n_outs = 10 subdataset = datasets.nist_digits() examples_per_epoch = NIST_DIGITS_TRAIN_SIZE else: n_outs = 62 subdataset = datasets.nist_all() examples_per_epoch = NIST_ALL_TRAIN_SIZE print 'Using subdataset ', subdataset_name #To be sure variables will not be only in the if statement PATH = '' nom_reptrain = '' nom_serie = "" if state['pretrain_choice'] == 0: nom_serie="series_NIST.h5" elif state['pretrain_choice'] == 1: nom_serie="series_P07.h5" series = create_series(state.num_hidden_layers,nom_serie) print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset_name=subdataset_name,\ dataset=subdataset,\ hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ series=series, max_minibatches=rtt) parameters=[] #Number of files of P07 used for pretraining nb_file=0 print('\n\tpretraining with NIST\n') optimizer.pretrain(subdataset, decrease = dec) channel.save() #Set some of the parameters used for the finetuning if state.has_key('finetune_set'): finetune_choice=state['finetune_set'] else: finetune_choice=FINETUNE_SET if state.has_key('max_finetuning_epochs'): max_finetune_epoch_NIST=state['max_finetuning_epochs'] else: max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS if state.has_key('max_finetuning_epochs_P07'): max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] else: max_finetune_epoch_P07=max_finetune_epoch_NIST #Decide how the finetune is done if finetune_choice == 0: print('\n\n\tfinetune with NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(subdataset,subdataset,max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() if finetune_choice == 1: print('\n\n\tfinetune with P07\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() if finetune_choice == 2: print('\n\n\tfinetune with P07 followed by NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() if finetune_choice == 3: print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ All hidden units output are input of the logistic regression\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) if finetune_choice==-1: print('\nSERIE OF 4 DIFFERENT FINETUNINGS') print('\n\n\tfinetune with NIST\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') sys.stdout.flush() optimizer.reload_parameters('params_finetune_P07.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ All hidden units output are input of the logistic regression\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) channel.save() channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None #REDUCE_TRAIN_TO = 40000 if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO if state.has_key('decrease_lr'): decrease_lr = state['decrease_lr'] else : decrease_lr = 0 n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) examples_per_epoch = 100000#NIST_ALL_TRAIN_SIZE #To be sure variables will not be only in the if statement PATH = '' nom_reptrain = '' nom_serie = "" if state['pretrain_choice'] == 0: nom_serie="series_NIST.h5" elif state['pretrain_choice'] == 1: nom_serie="series_P07.h5" series = create_series(state.num_hidden_layers,nom_serie) print "Creating optimizer with state, ", state optimizer = CSdASgdOptimizer(dataset=datasets.nist_all(), hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ series=series, max_minibatches=rtt) parameters=[] #Number of files of P07 used for pretraining nb_file=0 if state['pretrain_choice'] == 0: print('\n\tpretraining with NIST\n') optimizer.pretrain(datasets.nist_all()) elif state['pretrain_choice'] == 1: #To know how many file will be used during pretraining nb_file = int(state['pretraining_epochs_per_layer']) state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset if nb_file >=100: sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+ "You have to correct the code (and be patient, P07 is huge !!)\n"+ "or reduce the number of pretraining epoch to run the code (better idea).\n") print('\n\tpretraining with P07') optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) channel.save() #Set some of the parameters used for the finetuning if state.has_key('finetune_set'): finetune_choice=state['finetune_set'] else: finetune_choice=FINETUNE_SET if state.has_key('max_finetuning_epochs'): max_finetune_epoch_NIST=state['max_finetuning_epochs'] else: max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS if state.has_key('max_finetuning_epochs_P07'): max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] else: max_finetune_epoch_P07=max_finetune_epoch_NIST #Decide how the finetune is done if finetune_choice == 0: print('\n\n\tfinetune with NIST\n\n') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() if finetune_choice == 1: print('\n\n\tfinetune with P07\n\n') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() if finetune_choice == 2: print('\n\n\tfinetune with P07 followed by NIST\n\n') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() if finetune_choice == 3: print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ All hidden units output are input of the logistic regression\n\n') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) if finetune_choice==-1: print('\nSERIE OF 4 DIFFERENT FINETUNINGS') print('\n\n\tfinetune with NIST\n\n') sys.stdout.flush() optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') sys.stdout.flush() optimizer.reload_parameters('params_finetune_P07.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ All hidden units output are input of the logistic regression\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) channel.save() channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel,set_choice): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) examples_per_epoch = NIST_ALL_TRAIN_SIZE PATH = '' if set_choice == 0: maximum_exemples=int(500000) #Maximum number of exemples seen else: maximum_exemples = int(1000000000) #an impossible number print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ max_minibatches=rtt) if os.path.exists(PATH+'params_finetune_NIST.txt'): print ('\n finetune = NIST ') optimizer.reload_parameters(PATH+'params_finetune_NIST.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) if os.path.exists(PATH+'params_finetune_P07.txt'): print ('\n finetune = P07 ') optimizer.reload_parameters(PATH+'params_finetune_P07.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'): print ('\n finetune = NIST then P07') optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'): print ('\n finetune = P07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) if os.path.exists(PATH+'params_finetune_PNIST07.txt'): print ('\n finetune = PNIST07') optimizer.reload_parameters(PATH+'params_finetune_PNIST07.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'): print ('\n finetune = PNIST07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_PNIST07_then_NIST.txt') print "For" + str(maximum_exemples) + "over the NIST set: " optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the P07 set: " optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples),set_choice) print "For" + str(maximum_exemples) + "over the PNIST07 set: " optimizer.training_error(datasets.PNIST07(maxsize=maximum_exemples),set_choice) channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state, [theano, ift6266, pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key("reduce_train_to"): rtt = state["reduce_train_to"] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO n_ins = 32 * 32 n_outs = 62 # 10 digits + 26*2 (lower, capitals) examples_per_epoch = NIST_ALL_TRAIN_SIZE PATH = PATH_P07 maximum_exemples = int(100) # Maximum number of exemples seen print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer( dataset=datasets.nist_all(), hyperparameters=state, n_ins=n_ins, n_outs=n_outs, examples_per_epoch=examples_per_epoch, max_minibatches=rtt, ) print "The model is created" if os.path.exists(PATH + "params_finetune_NIST.txt"): print ("\n finetune = NIST ") optimizer.reload_parameters(PATH + "params_finetune_NIST.txt") print "For" + str(maximum_exemples) + "over the NIST test set: " optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples)) if os.path.exists(PATH + "params_finetune_P07.txt"): print ("\n finetune = P07 ") optimizer.reload_parameters(PATH + "params_finetune_P07.txt") print "For" + str(maximum_exemples) + "over the P07 test set: " optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples)) if os.path.exists(PATH + "params_finetune_NIST_then_P07.txt"): print ("\n finetune = NIST then P07") optimizer.reload_parameters(PATH + "params_finetune_NIST_then_P07.txt") print "For" + str(maximum_exemples) + "over the NIST test set: " optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples)) print "For" + str(maximum_exemples) + "over the P07 test set: " optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples)) if os.path.exists(PATH + "params_finetune_P07_then_NIST.txt"): print ("\n finetune = P07 then NIST") optimizer.reload_parameters(PATH + "params_finetune_P07_then_NIST.txt") print "For" + str(maximum_exemples) + "over the P07 test set: " optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples)) print "For" + str(maximum_exemples) + "over the NIST test set: " optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples)) channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) examples_per_epoch = NIST_ALL_TRAIN_SIZE PATH = '' NIST_BY_CLASS=0 print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ max_minibatches=rtt) if os.path.exists(PATH+'params_finetune_NIST.txt'): print ('\n finetune = NIST ') optimizer.reload_parameters(PATH+'params_finetune_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07.txt'): print ('\n finetune = P07 ') optimizer.reload_parameters(PATH+'params_finetune_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'): print ('\n finetune = NIST then P07') optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'): print ('\n finetune = P07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07.txt'): print ('\n finetune = PNIST07') optimizer.reload_parameters(PATH+'params_finetune_PNIST07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'): print ('\n finetune = PNIST07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_PNIST07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) channel.save() return channel.COMPLETE
def go(state, channel): from ift6266 import datasets from ift6266.deep.convolutional_dae.sgd_opt import sgd_opt import pylearn, theano, ift6266 import pylearn.version import sys # params: bsize, pretrain_lr, train_lr, nfilts1, nfilts2, nftils3, nfilts4 # pretrain_rounds, noise, mlp_sz pylearn.version.record_versions(state, [theano, ift6266, pylearn]) # TODO: maybe record pynnet version? channel.save() dset = datasets.nist_P07() nfilts = [] fsizes = [] if state.nfilts1 != 0: nfilts.append(state.nfilts1) fsizes.append((5,5)) if state.nfilts2 != 0: nfilts.append(state.nfilts2) fsizes.append((3,3)) if state.nfilts3 != 0: nfilts.append(state.nfilts3) fsizes.append((3,3)) if state.nfilts4 != 0: nfilts.append(state.nfilts4) fsizes.append((2,2)) subs = [(2,2)]*len(nfilts) noise = [state.noise]*len(nfilts) pretrain_funcs, trainf, evalf, net = build_funcs( img_size=(32, 32), batch_size=state.bsize, filter_sizes=fsizes, num_filters=nfilts, subs=subs, noise=noise, mlp_sizes=[state.mlp_sz], out_size=62, dtype=numpy.float32, pretrain_lr=state.pretrain_lr, train_lr=state.train_lr) t_it = repeat_itf(dset.train, state.bsize) pretrain_fs, train, valid, test = massage_funcs( t_it, t_it, dset, state.bsize, pretrain_funcs, trainf,evalf) series = create_series() print "pretraining ..." sys.stdout.flush() do_pretrain(pretrain_fs, state.pretrain_rounds, series['recons_error']) print "training ..." sys.stdout.flush() best_valid, test_score = sgd_opt(train, valid, test, training_epochs=800000, patience=2000, patience_increase=2., improvement_threshold=0.995, validation_frequency=500, series=series, net=net) state.best_valid = best_valid state.test_score = test_score channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel): global TEST_RUN minibatch_size = state.minibatch_size print_every = 100000 COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples if TEST_RUN: print_every = 100 COMPUTE_ERROR_EVERY = 1000 / minibatch_size print "entrypoint, state is" print state ###################### # select dataset and dataset subset, plus adjust epoch num to make number # of examples seen independent of dataset # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100 # en partant car on a pas les variations # compute this in terms of the P07 dataset size (=80M) MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size if state.train_on == 'NIST' and state.train_subset == 'ALL': dataset_obj = datasets.nist_all() elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY': dataset_obj = datasets.nist_digits() elif state.train_on == 'NISTP' and state.train_subset == 'ALL': dataset_obj = datasets.PNIST07() elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY': dataset_obj = PNIST07_digits elif state.train_on == 'P07' and state.train_subset == 'ALL': dataset_obj = datasets.nist_P07() elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY': dataset_obj = datasets.P07_digits dataset = dataset_obj if state.train_subset == 'ALL': n_classes = 62 elif state.train_subset == 'DIGITS_ONLY': n_classes = 10 else: raise NotImplementedError() ############################### # construct model print "constructing model..." x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(state.rng_seed) # construct the MLP class model = MLP(rng = rng, input=x, n_in=N_INPUTS, n_hidden_layers = state.n_hidden_layers, n_hidden = state.n_hidden, n_out=n_classes) # cost and training fn cost = T.mean(model.negative_log_likelihood(y)) \ + state.L1_reg * model.L1 \ + state.L2_reg * model.L2_sqr print "L1, L2: ", state.L1_reg, state.L2_reg gradient_nll_wrt_params = [] for param in model.params: gparam = T.grad(cost, param) gradient_nll_wrt_params.append(gparam) learning_rate = 10**float(state.learning_rate_log10) print "Learning rate", learning_rate train_updates = {} for param, gparam in zip(model.params, gradient_nll_wrt_params): train_updates[param] = param - learning_rate * gparam train_fn = theano.function([x,y], cost, updates=train_updates) ####################### # create series basedir = os.getcwd() h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w") series = {} add_error_series(series, "training_error", h5f, index_names=('minibatch_idx',), use_accumulator=True, reduce_every=REDUCE_EVERY) ########################## # training loop start_time = time.clock() print "begin training..." print "will train for", MINIBATCHES_TO_SEE, "examples" mb_idx = 0 while(mb_idx*minibatch_size<nb_max_exemples): last_costs = [] for mb_x, mb_y in dataset.train(minibatch_size): if TEST_RUN and mb_idx > 1000: break last_cost = train_fn(mb_x, mb_y) series["training_error"].append((mb_idx,), last_cost) last_costs.append(last_cost) if (len(last_costs)+1) % print_every == 0: print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs) last_costs = [] if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0: # compute errors print "computing errors on all datasets..." print "Time since training began: ", (time.clock()-start_time)/60., "minutes" compute_and_save_errors(state, model, series, h5f, mb_idx) channel.save() sys.stdout.flush() end_time = time.clock() print "-"*80 print "Finished. Training took", (end_time-start_time)/60., "minutes" print state
def mlp_full_nist( verbose = 1,\ adaptive_lr = 0,\ data_set=0,\ learning_rate=0.01,\ L1_reg = 0.00,\ L2_reg = 0.0001,\ nb_max_exemples=1000000,\ batch_size=20,\ nb_hidden = 30,\ nb_targets = 62, tau=1e6,\ lr_t2_factor=0.5,\ init_model=0,\ channel=0,\ detection_mode=0): if channel!=0: channel.save() configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] #save initial learning rate if classical adaptive lr is used initial_lr=learning_rate max_div_count=1000 optimal_test_error=0 total_validation_error_list = [] total_train_error_list = [] learning_rate_list=[] best_training_error=float('inf'); divergence_flag_list=[] if data_set==0: print 'using nist' dataset=datasets.nist_all() elif data_set==1: print 'using p07' dataset=datasets.nist_P07() elif data_set==2: print 'using pnist' dataset=datasets.PNIST07() ishape = (32,32) # this is the size of NIST images # allocate symbolic variables for the data x = T.fmatrix() # the data is presented as rasterized images y = T.lvector() # the labels are presented as 1D vector of # [long int] labels # construct the logistic regression class classifier = MLP( input=x,\ n_in=32*32,\ n_hidden=nb_hidden,\ n_out=nb_targets, learning_rate=learning_rate, detection_mode=detection_mode) # check if we want to initialise the weights with a previously calculated model # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) if init_model!=0: print 'using old model' print init_model old_model=numpy.load(init_model) classifier.W1.value=old_model['W1'] classifier.W2.value=old_model['W2'] classifier.b1.value=old_model['b1'] classifier.b2.value=old_model['b2'] # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically if(detection_mode==0): cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr else: cost = classifier.cross_entropy(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # compiling a theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function([x,y], classifier.errors(y)) # compute the gradient of cost with respect to theta = (W1, b1, W2, b2) g_W1 = T.grad(cost, classifier.W1) g_b1 = T.grad(cost, classifier.b1) g_W2 = T.grad(cost, classifier.W2) g_b2 = T.grad(cost, classifier.b2) # specify how to update the parameters of the model as a dictionary updates = \ { classifier.W1: classifier.W1 - classifier.lr*g_W1 \ , classifier.b1: classifier.b1 - classifier.lr*g_b1 \ , classifier.W2: classifier.W2 - classifier.lr*g_W2 \ , classifier.b2: classifier.b2 - classifier.lr*g_b2 } # compiling a theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function([x, y], cost, updates = updates ) #conditions for stopping the adaptation: #1) we have reached nb_max_exemples (this is rounded up to be a multiple of the train size so we always do at least 1 epoch) #2) validation error is going up twice in a row(probable overfitting) # This means we no longer stop on slow convergence as low learning rates stopped # too fast but instead we will wait for the valid error going up 3 times in a row # We save the curb of the validation error so we can always go back to check on it # and we save the absolute best model anyway, so we might as well explore # a bit when diverging #approximate number of samples in the nist training set #this is just to have a validation frequency #roughly proportionnal to the original nist training set n_minibatches = 650000/batch_size patience =2*nb_max_exemples/batch_size #in units of minibatch validation_frequency = n_minibatches/4 best_validation_loss = float('inf') best_iter = 0 test_score = 0. start_time = time.clock() time_n=0 #in unit of exemples minibatch_index=0 epoch=0 temp=0 divergence_flag=0 print 'starting training' sys.stdout.flush() while(minibatch_index*batch_size<nb_max_exemples): for x, y in dataset.train(batch_size): #if we are using the classic learning rate deacay, adjust it before training of current mini-batch if adaptive_lr==2: classifier.lr.value = tau*initial_lr/(tau+time_n) #train model cost_ij = train_model(x,y) if (minibatch_index) % validation_frequency == 0: #save the current learning rate learning_rate_list.append(classifier.lr.value) divergence_flag_list.append(divergence_flag) # compute the validation error this_validation_loss = 0. temp=0 for xv,yv in dataset.valid(1): # sum up the errors for each minibatch this_validation_loss += test_model(xv,yv) temp=temp+1 # get the average by dividing with the number of minibatches this_validation_loss /= temp #save the validation loss total_validation_error_list.append(this_validation_loss) print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % (epoch, minibatch_index+1,classifier.lr.value, this_validation_loss*100.)) sys.stdout.flush() #save temp results to check during training numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\ learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = minibatch_index #reset divergence flag divergence_flag=0 #save the best model. Overwrite the current saved best model so #we only keep the best numpy.savez('best_model.npy', config=configuration, W1=classifier.W1.value, W2=classifier.W2.value, b1=classifier.b1.value,\ b2=classifier.b2.value, minibatch_index=minibatch_index) # test it on the test set test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(('epoch %i, minibatch %i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, test_score*100.)) sys.stdout.flush() optimal_test_error=test_score # if the validation error is going up, we are overfitting (or oscillating) # check if we are allowed to continue and if we will adjust the learning rate elif this_validation_loss >= best_validation_loss: # In non-classic learning rate decay, we modify the weight only when # validation error is going up if adaptive_lr==1: classifier.lr.value=classifier.lr.value*lr_t2_factor #cap the patience so we are allowed to diverge max_div_count times #if we are going up max_div_count in a row, we will stop immediatelty by modifying the patience divergence_flag = divergence_flag +1 #calculate the test error at this point and exit # test it on the test set test_score = 0. temp=0 for xt,yt in dataset.test(batch_size): test_score += test_model(xt,yt) temp=temp+1 test_score /= temp print ' validation error is going up, possibly stopping soon' print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, test_score*100.)) sys.stdout.flush() # check early stop condition if divergence_flag==max_div_count: minibatch_index=nb_max_exemples print 'we have diverged, early stopping kicks in' break #check if we have seen enough exemples #force one epoch at least if epoch>0 and minibatch_index*batch_size>nb_max_exemples: break time_n= time_n + batch_size minibatch_index = minibatch_index + 1 # we have finished looping through the training set epoch = epoch+1 end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter, test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) print minibatch_index sys.stdout.flush() #save the model and the weights numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) return (best_training_error*100.0,best_validation_loss * 100.,optimal_test_error*100.,best_iter*batch_size,(end_time-start_time)/60)
def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz', data_set=0): # load the data set and create an mlp based on the dimensions of the model model=numpy.load(model_name) W1=model['W1'] W2=model['W2'] b1=model['b1'] b2=model['b2'] total_error_count=0.0 total_exemple_count=0.0 nb_error_count=0.0 nb_exemple_count=0.0 char_error_count=0.0 char_exemple_count=0.0 min_error_count=0.0 min_exemple_count=0.0 maj_error_count=0.0 maj_exemple_count=0.0 vtotal_error_count=0.0 vtotal_exemple_count=0.0 vnb_error_count=0.0 vnb_exemple_count=0.0 vchar_error_count=0.0 vchar_exemple_count=0.0 vmin_error_count=0.0 vmin_exemple_count=0.0 vmaj_error_count=0.0 vmaj_exemple_count=0.0 nbc_error_count=0.0 vnbc_error_count=0.0 if data_set==0: print 'using nist' dataset=datasets.nist_all() elif data_set==1: print 'using p07' dataset=datasets.nist_P07() elif data_set==2: print 'using pnist' dataset=datasets.PNIST07() #get the test error #use a batch size of 1 so we can get the sub-class error #without messing with matrices (will be upgraded later) test_score=0 temp=0 for xt,yt in dataset.test(1): total_exemple_count = total_exemple_count +1 #get activation for layer 1 a0=numpy.dot(numpy.transpose(W1),numpy.transpose(xt[0])) + b1 #add non linear function to layer 1 activation a0_out=numpy.tanh(a0) #get activation for output layer a1= numpy.dot(numpy.transpose(W2),a0_out) + b2 #add non linear function for output activation (softmax) a1_exp = numpy.exp(a1) sum_a1=numpy.sum(a1_exp) a1_out=a1_exp/sum_a1 predicted_class=numpy.argmax(a1_out) wanted_class=yt[0] if(predicted_class!=wanted_class): total_error_count = total_error_count +1 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )): nbc_error_count = nbc_error_count +1 #treat digit error if(wanted_class<10): nb_exemple_count=nb_exemple_count + 1 predicted_class=numpy.argmax(a1_out[0:10]) if(predicted_class!=wanted_class): nb_error_count = nb_error_count +1 if(wanted_class>9): char_exemple_count=char_exemple_count + 1 predicted_class=numpy.argmax(a1_out[10:62])+10 if((predicted_class!=wanted_class) and ((predicted_class+26)!=wanted_class) and ((predicted_class-26)!=wanted_class)): char_error_count = char_error_count +1 #minuscule if(wanted_class>9 and wanted_class<36): maj_exemple_count=maj_exemple_count + 1 predicted_class=numpy.argmax(a1_out[10:35])+10 if(predicted_class!=wanted_class): maj_error_count = maj_error_count +1 #majuscule if(wanted_class>35): min_exemple_count=min_exemple_count + 1 predicted_class=numpy.argmax(a1_out[36:62])+36 if(predicted_class!=wanted_class): min_error_count = min_error_count +1 vtest_score=0 vtemp=0 for xt,yt in dataset.valid(1): vtotal_exemple_count = vtotal_exemple_count +1 #get activation for layer 1 a0=numpy.dot(numpy.transpose(W1),numpy.transpose(xt[0])) + b1 #add non linear function to layer 1 activation a0_out=numpy.tanh(a0) #get activation for output layer a1= numpy.dot(numpy.transpose(W2),a0_out) + b2 #add non linear function for output activation (softmax) a1_exp = numpy.exp(a1) sum_a1=numpy.sum(a1_exp) a1_out=a1_exp/sum_a1 predicted_class=numpy.argmax(a1_out) wanted_class=yt[0] if(predicted_class!=wanted_class): vtotal_error_count = vtotal_error_count +1 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )): vnbc_error_count = nbc_error_count +1 #treat digit error if(wanted_class<10): vnb_exemple_count=vnb_exemple_count + 1 predicted_class=numpy.argmax(a1_out[0:10]) if(predicted_class!=wanted_class): vnb_error_count = vnb_error_count +1 if(wanted_class>9): vchar_exemple_count=vchar_exemple_count + 1 predicted_class=numpy.argmax(a1_out[10:62])+10 if((predicted_class!=wanted_class) and ((predicted_class+26)!=wanted_class) and ((predicted_class-26)!=wanted_class)): vchar_error_count = vchar_error_count +1 #minuscule if(wanted_class>9 and wanted_class<36): vmaj_exemple_count=vmaj_exemple_count + 1 predicted_class=numpy.argmax(a1_out[10:35])+10 if(predicted_class!=wanted_class): vmaj_error_count = vmaj_error_count +1 #majuscule if(wanted_class>35): vmin_exemple_count=vmin_exemple_count + 1 predicted_class=numpy.argmax(a1_out[36:62])+36 if(predicted_class!=wanted_class): vmin_error_count = vmin_error_count +1 print (('total error = %f') % ((total_error_count/total_exemple_count)*100.0)) print (('number error = %f') % ((nb_error_count/nb_exemple_count)*100.0)) print (('char error = %f') % ((char_error_count/char_exemple_count)*100.0)) print (('min error = %f') % ((min_error_count/min_exemple_count)*100.0)) print (('maj error = %f') % ((maj_error_count/maj_exemple_count)*100.0)) print (('36 error = %f') % ((nbc_error_count/total_exemple_count)*100.0)) print (('valid total error = %f') % ((vtotal_error_count/vtotal_exemple_count)*100.0)) print (('valid number error = %f') % ((vnb_error_count/vnb_exemple_count)*100.0)) print (('valid char error = %f') % ((vchar_error_count/vchar_exemple_count)*100.0)) print (('valid min error = %f') % ((vmin_error_count/vmin_exemple_count)*100.0)) print (('valid maj error = %f') % ((vmaj_error_count/vmaj_exemple_count)*100.0)) print (('valid 36 error = %f') % ((vnbc_error_count/vtotal_exemple_count)*100.0)) print (('num total = %d,%d') % (total_exemple_count,total_error_count)) print (('num nb = %d,%d') % (nb_exemple_count,nb_error_count)) print (('num min = %d,%d') % (min_exemple_count,min_error_count)) print (('num maj = %d,%d') % (maj_exemple_count,maj_error_count)) print (('num char = %d,%d') % (char_exemple_count,char_error_count)) total_error_count/=total_exemple_count nb_error_count/=nb_exemple_count char_error_count/=char_exemple_count min_error_count/=min_exemple_count maj_error_count/=maj_exemple_count nbc_error_count/=total_exemple_count vtotal_error_count/=vtotal_exemple_count vnb_error_count/=vnb_exemple_count vchar_error_count/=vchar_exemple_count vmin_error_count/=vmin_exemple_count vmaj_error_count/=vmaj_exemple_count vnbc_error_count/=vtotal_exemple_count return (total_error_count,nb_error_count,char_error_count,min_error_count,maj_error_count,nbc_error_count,\ vtotal_error_count,vnb_error_count,vchar_error_count,vmin_error_count,vmaj_error_count,vnbc_error_count)