if len(args) > 0 and args[0] == 'sigmoid': type = 0 elif len(args) > 0 and args[0] == 'tanh': type = 1 part = 2 #0=train, 1=valid, 2=test PATH = '' #Can be changed too if model is not in the current drectory if os.path.exists(PATH+'params_finetune_NIST.txt'): start_time = time.clock() print ('\n finetune = NIST ') print "NIST DIGITS" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_digits(),part=part,type=type) print "NIST LOWER CASE" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_lower(),part=part,type=type) print "NIST UPPER CASE" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_upper(),part=part,type=type) end_time = time.clock() print ('It took %f minutes' %((end_time-start_time)/60.)) if os.path.exists(PATH+'params_finetune_P07.txt'): start_time = time.clock() print ('\n finetune = P07 ') print "NIST DIGITS" test_data(PATH+'params_finetune_P07.txt',datasets.nist_digits(),part=part,type=type) print "NIST LOWER CASE" test_data(PATH+'params_finetune_P07.txt',datasets.nist_lower(),part=part,type=type) print "NIST UPPER CASE" test_data(PATH+'params_finetune_P07.txt',datasets.nist_upper(),part=part,type=type)
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) examples_per_epoch = NIST_ALL_TRAIN_SIZE PATH = '' NIST_BY_CLASS=0 print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ max_minibatches=rtt) if os.path.exists(PATH+'params_finetune_NIST.txt'): print ('\n finetune = NIST ') optimizer.reload_parameters(PATH+'params_finetune_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07.txt'): print ('\n finetune = P07 ') optimizer.reload_parameters(PATH+'params_finetune_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'): print ('\n finetune = NIST then P07') optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'): print ('\n finetune = P07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07.txt'): print ('\n finetune = PNIST07') optimizer.reload_parameters(PATH+'params_finetune_PNIST07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'): print ('\n finetune = PNIST07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_PNIST07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) channel.save() return channel.COMPLETE
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO if state.has_key('decrease_lr'): decrease_lr = state['decrease_lr'] else : decrease_lr = 0 if state.has_key('decrease_lr_pretrain'): dec=state['decrease_lr_pretrain'] else : dec=0 n_ins = 32*32 if state.has_key('subdataset'): subdataset_name=state['subdataset'] else: subdataset_name=SUBDATASET_NIST #n_outs = 62 # 10 digits, 26*2 (lower, capitals) if subdataset_name == "upper": n_outs = 26 subdataset = datasets.nist_upper() examples_per_epoch = NIST_UPPER_TRAIN_SIZE elif subdataset_name == "lower": n_outs = 26 subdataset = datasets.nist_lower() examples_per_epoch = NIST_LOWER_TRAIN_SIZE elif subdataset_name == "digits": n_outs = 10 subdataset = datasets.nist_digits() examples_per_epoch = NIST_DIGITS_TRAIN_SIZE else: n_outs = 62 subdataset = datasets.nist_all() examples_per_epoch = NIST_ALL_TRAIN_SIZE print 'Using subdataset ', subdataset_name #To be sure variables will not be only in the if statement PATH = '' nom_reptrain = '' nom_serie = "" if state['pretrain_choice'] == 0: nom_serie="series_NIST.h5" elif state['pretrain_choice'] == 1: nom_serie="series_P07.h5" series = create_series(state.num_hidden_layers,nom_serie) print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset_name=subdataset_name,\ dataset=subdataset,\ hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ series=series, max_minibatches=rtt) parameters=[] #Number of files of P07 used for pretraining nb_file=0 print('\n\tpretraining with NIST\n') optimizer.pretrain(subdataset, decrease = dec) channel.save() #Set some of the parameters used for the finetuning if state.has_key('finetune_set'): finetune_choice=state['finetune_set'] else: finetune_choice=FINETUNE_SET if state.has_key('max_finetuning_epochs'): max_finetune_epoch_NIST=state['max_finetuning_epochs'] else: max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS if state.has_key('max_finetuning_epochs_P07'): max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] else: max_finetune_epoch_P07=max_finetune_epoch_NIST #Decide how the finetune is done if finetune_choice == 0: print('\n\n\tfinetune with NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(subdataset,subdataset,max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() if finetune_choice == 1: print('\n\n\tfinetune with P07\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() if finetune_choice == 2: print('\n\n\tfinetune with P07 followed by NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() if finetune_choice == 3: print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ All hidden units output are input of the logistic regression\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) if finetune_choice==-1: print('\nSERIE OF 4 DIFFERENT FINETUNINGS') print('\n\n\tfinetune with NIST\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') sys.stdout.flush() optimizer.reload_parameters('params_finetune_P07.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ All hidden units output are input of the logistic regression\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) channel.save() channel.save() return channel.COMPLETE
def test_error(model_file): print((' test error on all NIST')) # load the model a=numpy.load(model_file) W1=a['W1'] W2=a['W2'] b1=a['b1'] b2=a['b2'] configuration=a['config'] #configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] learning_rate = configuration[0] nb_max_exemples = configuration[1] nb_hidden = configuration[2] adaptive_lr = configuration[3] if(len(configuration) == 6): detection_mode = configuration[4] reduce_label = configuration[5] else: detection_mode = 0 reduce_label = 0 # define the batch size batch_size=20 #define the nb of target nb_targets = 62 # create the mlp ishape = (32,32) # this is the size of NIST images # allocate symbolic variables for the data x = T.fmatrix() # the data is presented as rasterized images y = T.lvector() # the labels are presented as 1D vector of # [long int] labels # construct the logistic regression class classifier = MLP( input=x,\ n_in=32*32,\ n_hidden=nb_hidden,\ n_out=nb_targets, learning_rate=learning_rate,\ detection_mode=detection_mode) # set the weight into the model classifier.W1.value = W1 classifier.b1.value = b1 classifier.W2.value = W2 classifier.b2.value = b2 # compiling a theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function([x,y], classifier.errors(y)) # test it on the test set # load NIST ALL dataset=datasets.nist_all() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST ALL : %f %%') %(test_score*100.0)) # load NIST DIGITS dataset=datasets.nist_digits() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST digits : %f %%') %(test_score*100.0)) # load NIST lower dataset=datasets.nist_lower() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST lower : %f %%') %(test_score*100.0)) # load NIST upper dataset=datasets.nist_upper() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST upper : %f %%') %(test_score*100.0))