def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path if not os.path.isfile(model_file): model_file = context.extensions[-1].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model # Train again context.main_loop()
def main(argv, freeze): try: opts, args = getopt.getopt(argv, '') yaml = args[0] model = args[1] except getopt.GetoptError: usage() sys.exit(2) # Load yaml with open(yaml, "r") as sty: train = serial.load_train_file(yaml) #train = yaml_parse.load(sty) # Load pretrained model with bad sigmoid output with open(model, 'r') as fo: model = pkl.load(fo) # Remove the last layer, puts a real sigmoid instead if freeze: for i in range(0, len(model.layers) - 2): model.freeze(model.layers[i].get_params()) ### Add last conv elemwise layer = ConvElemwise(layer_name= 'out', output_channels= 1, kernel_shape=(1,1), irange=0.05, nonlinearity=IdentityConvNonlinearity(), max_kernel_norm= 7.9, tied_b=1) layer.set_mlp(model) layer.set_input_space(model.layers[-3].get_output_space()) model.layers[-2] = layer ### Add Sigmoid layer = SigmoidExtended(layer_name='y', n_classes=1) layer.set_mlp(model) layer.set_input_space(model.layers[-2].get_output_space()) model.layers[-1] = layer #print model.layers #model.monitor = train.model.monitor #train.model = model train.model = push_monitor(model, "old") print train.model #train = Train(train.dataset, model, train.algorithm, train.save_path, # train.save_freq, train.extensions, train.allow_overwrite) train.main_loop()
def main(argv, freeze): try: opts, args = getopt.getopt(argv, '') yaml = args[0] model = args[1] except getopt.GetoptError: usage() sys.exit(2) # Load yaml with open(yaml, "r") as sty: train = serial.load_train_file(yaml) #train = yaml_parse.load(sty) # Load pretrained model with bad sigmoid output with open(model, 'r') as fo: model = pkl.load(fo) # Remove the last layer, puts a real sigmoid instead if freeze: for i in range(0, len(model.layers) - 2): model.freeze(model.layers[i].get_params()) ### Add last conv elemwise layer = ConvElemwise(layer_name='out', output_channels=1, kernel_shape=(1, 1), irange=0.05, nonlinearity=IdentityConvNonlinearity(), max_kernel_norm=7.9, tied_b=1) layer.set_mlp(model) layer.set_input_space(model.layers[-3].get_output_space()) model.layers[-2] = layer ### Add Sigmoid layer = SigmoidExtended(layer_name='y', n_classes=1) layer.set_mlp(model) layer.set_input_space(model.layers[-2].get_output_space()) model.layers[-1] = layer #print model.layers #model.monitor = train.model.monitor #train.model = model train.model = push_monitor(model, "old") print train.model #train = Train(train.dataset, model, train.algorithm, train.save_path, # train.save_freq, train.extensions, train.allow_overwrite) train.main_loop()
def test_transfer_experience(): # Makes sure the transfer_experience flag of push_monitor works model = DummyModel(num_features = 3) monitor = Monitor.get_monitor(model) monitor.report_batch(2) monitor.report_batch(3) monitor.report_epoch() model = push_monitor(model, "old_monitor", transfer_experience=True) assert model.old_monitor is monitor monitor = model.monitor assert monitor.get_epochs_seen() == 1 assert monitor.get_batches_seen() == 2 assert monitor.get_epochs_seen() == 1
def load(path): """Loads a model from path. We need this wrapper to make the loaded monitor continuable (currently deserialized monitor is non-functional in PyLearn2). For this we had to create a new monitor and initialize with the data from the old one. Parameters ---------- path : str The model path. """ model = push_monitor(serial.load(path), "_delete_me", transfer_experience=True, save_records=True) del model._delete_me return model
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path for ext in range(len(context.extensions)): if isinstance(context.extensions[ext], MonitorBasedSaveBest): pos = ext else: raise AssertionError( 'No MonitorBasedSaveBest extension in the model!') if not os.path.isfile(model_file): model_file = context.extensions[pos].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model context.save_path = context.extensions[pos].save_path[ 0:-4] + "_continue.pkl" context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl" # Train again context.main_loop()
def produce_train_obj(new_epochs, model=None): if model is None: model = MLP( layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) else: model = push_monitor(model, 'old_monitor', transfer_experience=True) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=N, new_epochs=new_epochs) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) return Train(dataset=dataset, model=model, algorithm=algorithm)
def train_again(yaml): ''' Relaunch training of a model with conditions specified by the YAML Looks for the model file defined by save path and replace the model instanciated by the one that was trained before ------------------------------------------------------------------- yaml : string, filename YAML file defining the exp to be continued ''' context = serial.load_train_file(yaml) print "\tLoaded YAML" # Load the trained model model_file = context.save_path for ext in range(len(context.extensions)): if isinstance(context.extensions[ext],MonitorBasedSaveBest): pos = ext else: raise AssertionError('No MonitorBasedSaveBest extension in the model!') if not os.path.isfile(model_file): model_file = context.extensions[pos].save_path with open(model_file, 'r') as m_f: trained_model = pkl.load(m_f) # Define the continuing one new_model = push_monitor(trained_model, 'trained_model', transfer_experience=True) # Define it as the model to be trained context.model = new_model context.save_path = context.extensions[pos].save_path[0:-4] + "_continue.pkl" context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl" # Train again context.main_loop()
def produce_train_obj(new_epochs, max_epochs, model=None): if model is None: model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) else: model = push_monitor(model, 'old_monitor', transfer_experience=True) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=max_epochs, new_epochs=new_epochs) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) return Train(dataset=dataset, model=model, algorithm=algorithm)
def setup(): N = 200000 # The paper keeps 1,000,000 memories num_frames = 4 # Prescribed by paper img_dims = (84, 84) # Prescribed by paper action_dims = 4 # Prescribed by ALE batch_size = 32 learning_rate = 0.05 batches_per_iter = 1 # How many batches to pull from memory discount_factor = 0.95 base_dir = '/data/lisa/exp/webbd/drl/experiments/2014-11-02' model_pickle_path = os.path.join(base_dir, 'best_model.pkl') log.info("Creating action cost.") action_cost = ActionCost.Action() # Load the model if it exists if os.path.exists(model_pickle_path): model = cPickle.load(open(model_pickle_path, 'rb')) model = monitor.push_monitor(model, "at", transfer_experience=True) # Otherwise create a new model else: # TODO This is a hacky way to find the model yaml model_yaml = os.path.dirname(os.path.realpath(__file__)) model_yaml = os.path.join(model_yaml, '../models/model_conv.yaml') log.info("Loading model yaml (%s)" % model_yaml) yaml_params = { 'num_channels': num_frames, 'action_dims': action_dims, } model = utils.load_yaml_template(model_yaml, yaml_params) log.info("Creating dataset.") dataset = Replay(N, img_dims, num_frames, action_dims) #monitoring_dataset = {} #monitoring_dataset['train'] = dataset log.info("Creating terminiation criterion.") termination_criterion = EpochCounter(1) log.info("Creating training algorithm.") algo = SGD( batch_size=batch_size, learning_rate=learning_rate, batches_per_iter=batches_per_iter, #monitoring_dataset=monitoring_dataset monitoring_dataset=None, cost=action_cost, termination_criterion=termination_criterion, learning_rule=RPROP() ) log.info("Creating training object.") train = Train(dataset=None, model=model, algorithm=algo) log.info("Creating percept_preprocessor.") percept_preprocessor = ppp.DeepMindPreprocessor(img_dims, base_dir) log.info("Creating agent.") action_map = { 0: 0, 1: 1, 2: 3, 3: 4, } return BasicQAgent( model, dataset, train, percept_preprocessor, action_map, base_dir, model_pickle_path, discount_factor=discount_factor, k=num_frames, epsilon=1, epsilon_anneal_frames=5000000 )
def cnn_train( train_path, test_path, valid_path, save_path, predict_path, image_path, num_rows=28, num_cols=28, num_channels=2, batch_size=128, output_channels=[64, 64], kernel_shape=[[12, 12], [5, 5]], pool_shape=[[4, 4], [2, 2]], pool_stride=[[2, 2], [2, 2]], irange=[0.05, 0.05, 0.05], max_kernel_norm=[1.9365, 1.9365], learning_rate=0.001, init_momentum=0.9, weight_decay=[0.0002, 0.0002, 0.0002], n_epoch=1000, ): #load data #t = time.time() ds = load_data(valid_path, num_rows, num_cols, num_channels) vld = SarDataset(np.array(ds[0]), ds[1]) ds = load_data(train_path, num_rows, num_cols, num_channels) trn = SarDataset(np.array(ds[0]), ds[1]) ds = load_data(test_path, num_rows, num_cols, num_channels) tst = SarDataset(np.array(ds[0]), ds[1]) #load balanced data #ds = load_data_balance_under_sample(train_path, num_rows,num_cols, num_channels) #trn = SarDataset(np.array(ds[0]),ds[1]) #ds = load_data_balance(valid_path, num_rows,num_cols, num_channels) #vld = SarDataset(np.array(ds[0]),ds[1]) #ds = load_data_balance(test_path, num_rows,num_cols, num_channels) #tst = SarDataset(np.array(ds[0]),ds[1]) #print 'Take {}s to read data'.format( time.time()-t) #use gaussian convlution on the origional image to see if it can concentrate in the center #trn,tst,vld = load_data_lidar() #mytransformer = transformer.TransformationPipeline(input_space=space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels),transformations=[transformer.Rotation(),transformer.Flipping()]) #trn = contestTransformerDataset.TransformerDataset(trn,mytransformer,space_preserving=True) #tst = contestTransformerDataset.TransformerDataset(tst,mytransformer,space_preserving=True) #vld = contestTransformerDataset.TransformerDataset(vld,mytransformer,space_preserving=True) #trn = transformer_dataset.TransformerDataset(trn,mytransformer,space_preserving=True) #tst = transformer_dataset.TransformerDataset(tst,mytransformer,space_preserving=True) #vld = transformer_dataset.TransformerDataset(vld,mytransformer,space_preserving=True) #setup the network t = time.time() layers = [] for i in range(len(output_channels)): layer_name = 'h{}'.format(i + 1) convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name, output_channels=output_channels[i], irange=irange[i], kernel_shape=kernel_shape[i], pool_shape=pool_shape[i], pool_stride=pool_stride[i], max_kernel_norm=max_kernel_norm[i]) layers.append(convlayer) output_mlp = mlp.Linear(dim=1, layer_name='output', irange=irange[-1], use_abs_loss=True) #output_mlp = mlp.linear_mlp_ace(dim=1,layer_name='output',irange=irange[-1]) layers.append(output_mlp) #ann = cPickle.load(open('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl')) #layers = [] #for layer in ann.layers: # layer.set_mlp_force(None) # layers.append(layer) trainer = sgd.SGD( learning_rate=learning_rate, batch_size=batch_size, termination_criterion=EpochCounter(n_epoch), #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]), #cost = dropout.Dropout(), cost=cost.SumOfCosts( [cost.MethodCost('cost_from_X'), WeightDecay(weight_decay)]), init_momentum=init_momentum, train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_shuffled_sequential', monitoring_dataset={ 'test': tst, 'valid': vld, 'train': trn }) input_space = space.Conv2DSpace(shape=[num_rows, num_cols], num_channels=num_channels) #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size) ann = serial.load( '../output/train_with_2010_2l_40_64/original_500/f/f0.pkl') ann = monitor.push_monitor(ann, 'stage_0') watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective', save_path=predict_path + save_path) flip = window_flip.WindowAndFlip((num_rows, num_cols), randomize=[tst, vld, trn]) experiment = Train(dataset=trn, model=ann, algorithm=trainer, extensions=[watcher, flip]) print 'Take {}s to compile code'.format(time.time() - t) #train the network t = time.time() experiment.main_loop() print 'Training time: {}h'.format((time.time() - t) / 3600) utils.sms_notice('Training time:{}'.format((time.time() - t) / 3600)) return ann
def supervisedLayerwisePRL(trainset, testset): ''' The supervised layerwise training as used in the PRL Paper. Input ------ trainset : A path to an hdf5 file created through h5py. testset : A path to an hdf5 file created through h5py. ''' batch_size = 100 # Both train and test h5py files are expected to have a 'topo_view' and 'y' # datasets side them corresponding to the 'b01c' data format as used in pylearn2 # and 'y' equivalent to the one hot encoded labels trn = HDF5Dataset(filename=trainset, topo_view='topo_view', y='y', load_all=False) tst = HDF5Dataset(filename=testset, topo_view='topo_view', y='y', load_all=False) ''' The 1st Convolution and Pooling Layers are added below. ''' h1 = mlp.ConvRectifiedLinear(layer_name='h1', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) layers = [h1, fc, output] mdl = mlp.MLP(layers, input_space=Conv2DSpace(shape=(70, 70), num_channels=1)) trainer = sgd.SGD( learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts( costs=[Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best1.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl') mdl = push_monitor(mdl, 'k') ''' The 2nd Convolution and Pooling Layers are added below. ''' h2 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h2, fc, output]) trainer = sgd.SGD(learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best2.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl') mdl = push_monitor(mdl, 'l') ''' The 3rd Convolution and Pooling Layers are added below. ''' h3 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=10, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h3, output]) trainer = sgd.SGD( learning_rate=.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best3.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop()
def cnn_train(train_path, test_path, valid_path, save_path, predict_path,image_path,num_rows=28, num_cols =28, num_channels =2, batch_size =128, output_channels =[64,64], kernel_shape =[[12,12],[5,5]], pool_shape =[[4,4],[2,2]], pool_stride =[[2,2],[2,2]], irange =[0.05,0.05,0.05], max_kernel_norm =[1.9365,1.9365], learning_rate =0.001, init_momentum =0.9, weight_decay =[0.0002,0.0002,0.0002], n_epoch = 1000, ): #load data #t = time.time() ds = load_data(valid_path, num_rows,num_cols, num_channels) vld = SarDataset(np.array(ds[0]),ds[1]) ds = load_data(train_path, num_rows,num_cols, num_channels) trn = SarDataset(np.array(ds[0]),ds[1]) ds = load_data(test_path, num_rows,num_cols, num_channels) tst = SarDataset(np.array(ds[0]),ds[1]) #load balanced data #ds = load_data_balance_under_sample(train_path, num_rows,num_cols, num_channels) #trn = SarDataset(np.array(ds[0]),ds[1]) #ds = load_data_balance(valid_path, num_rows,num_cols, num_channels) #vld = SarDataset(np.array(ds[0]),ds[1]) #ds = load_data_balance(test_path, num_rows,num_cols, num_channels) #tst = SarDataset(np.array(ds[0]),ds[1]) #print 'Take {}s to read data'.format( time.time()-t) #use gaussian convlution on the origional image to see if it can concentrate in the center #trn,tst,vld = load_data_lidar() #mytransformer = transformer.TransformationPipeline(input_space=space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels),transformations=[transformer.Rotation(),transformer.Flipping()]) #trn = contestTransformerDataset.TransformerDataset(trn,mytransformer,space_preserving=True) #tst = contestTransformerDataset.TransformerDataset(tst,mytransformer,space_preserving=True) #vld = contestTransformerDataset.TransformerDataset(vld,mytransformer,space_preserving=True) #trn = transformer_dataset.TransformerDataset(trn,mytransformer,space_preserving=True) #tst = transformer_dataset.TransformerDataset(tst,mytransformer,space_preserving=True) #vld = transformer_dataset.TransformerDataset(vld,mytransformer,space_preserving=True) #setup the network t = time.time() layers = [] for i in range(len(output_channels)): layer_name = 'h{}'.format(i+1) convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name, output_channels=output_channels[i],irange=irange[i],kernel_shape=kernel_shape[i],pool_shape=pool_shape[i],pool_stride=pool_stride[i],max_kernel_norm=max_kernel_norm[i]) layers.append(convlayer) output_mlp = mlp.Linear(dim=1,layer_name='output',irange=irange[-1], use_abs_loss=True) #output_mlp = mlp.linear_mlp_ace(dim=1,layer_name='output',irange=irange[-1]) layers.append(output_mlp) #ann = cPickle.load(open('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl')) #layers = [] #for layer in ann.layers: # layer.set_mlp_force(None) # layers.append(layer) trainer = sgd.SGD(learning_rate=learning_rate,batch_size=batch_size, termination_criterion=EpochCounter(n_epoch), #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]), #cost = dropout.Dropout(), cost = cost.SumOfCosts([cost.MethodCost('cost_from_X'), WeightDecay(weight_decay)]), init_momentum=init_momentum, train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_shuffled_sequential', monitoring_dataset={'test': tst, 'valid': vld, 'train': trn}) input_space = space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels) #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size) ann = serial.load('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl') ann = monitor.push_monitor(ann,'stage_0') watcher = best_params.MonitorBasedSaveBest( channel_name='valid_objective', save_path = predict_path+save_path) flip = window_flip.WindowAndFlip((num_rows,num_cols),randomize=[tst,vld,trn]) experiment = Train(dataset=trn, model=ann, algorithm=trainer, extensions=[watcher,flip]) print 'Take {}s to compile code'.format(time.time()-t) #train the network t = time.time() experiment.main_loop() print 'Training time: {}h'.format((time.time()-t)/3600) utils.sms_notice('Training time:{}'.format((time.time()-t)/3600)) return ann