Beispiel #1
0
def train_again(yaml):
    '''
    Relaunch training of a model with conditions specified by the YAML
    Looks for the model file defined by save path and replace the model
    instanciated by the one that was trained before
    -------------------------------------------------------------------
    yaml : string, filename
           YAML file defining the exp to be continued
    '''

    context = serial.load_train_file(yaml)
    print "\tLoaded YAML"

    # Load the trained model
    model_file = context.save_path

    if not os.path.isfile(model_file):
        model_file = context.extensions[-1].save_path

    with open(model_file, 'r') as m_f:
        trained_model = pkl.load(m_f)

    # Define the continuing one
    new_model = push_monitor(trained_model, 'trained_model',
                             transfer_experience=True)
    # Define it as the model to be trained
    context.model = new_model
    # Train again
    context.main_loop()
Beispiel #2
0
def train_again(yaml):
    '''
    Relaunch training of a model with conditions specified by the YAML
    Looks for the model file defined by save path and replace the model
    instanciated by the one that was trained before
    -------------------------------------------------------------------
    yaml : string, filename
           YAML file defining the exp to be continued
    '''

    context = serial.load_train_file(yaml)
    print "\tLoaded YAML"

    # Load the trained model
    model_file = context.save_path

    if not os.path.isfile(model_file):
        model_file = context.extensions[-1].save_path

    with open(model_file, 'r') as m_f:
        trained_model = pkl.load(m_f)

    # Define the continuing one
    new_model = push_monitor(trained_model,
                             'trained_model',
                             transfer_experience=True)
    # Define it as the model to be trained
    context.model = new_model
    # Train again
    context.main_loop()
Beispiel #3
0
def main(argv, freeze):

  try:
    opts, args = getopt.getopt(argv, '')
    yaml = args[0]
    model = args[1]
  except getopt.GetoptError:
    usage()
    sys.exit(2)

  # Load yaml
  with open(yaml, "r") as sty:
    train = serial.load_train_file(yaml)
    #train = yaml_parse.load(sty)


  # Load pretrained model with bad sigmoid output
  with  open(model, 'r') as fo:
    model = pkl.load(fo)

  # Remove the last layer, puts a real sigmoid instead
  if freeze:
    for i in range(0, len(model.layers) - 2):
      model.freeze(model.layers[i].get_params())


  ### Add last conv elemwise
  layer = ConvElemwise(layer_name= 'out',
                       output_channels= 1,
                       kernel_shape=(1,1),
                       irange=0.05,
                       nonlinearity=IdentityConvNonlinearity(),
                       max_kernel_norm= 7.9,
                       tied_b=1)
  layer.set_mlp(model)
  layer.set_input_space(model.layers[-3].get_output_space())
  model.layers[-2] = layer

  ### Add Sigmoid
  layer = SigmoidExtended(layer_name='y', n_classes=1)
  layer.set_mlp(model)
  layer.set_input_space(model.layers[-2].get_output_space())
  model.layers[-1] = layer

  #print model.layers
  #model.monitor = train.model.monitor
  #train.model = model
  train.model = push_monitor(model, "old")
  print train.model


  #train = Train(train.dataset, model, train.algorithm, train.save_path,
  #                train.save_freq, train.extensions, train.allow_overwrite)
  train.main_loop()
Beispiel #4
0
def main(argv, freeze):

    try:
        opts, args = getopt.getopt(argv, '')
        yaml = args[0]
        model = args[1]
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    # Load yaml
    with open(yaml, "r") as sty:
        train = serial.load_train_file(yaml)
        #train = yaml_parse.load(sty)

    # Load pretrained model with bad sigmoid output
    with open(model, 'r') as fo:
        model = pkl.load(fo)

    # Remove the last layer, puts a real sigmoid instead
    if freeze:
        for i in range(0, len(model.layers) - 2):
            model.freeze(model.layers[i].get_params())

    ### Add last conv elemwise
    layer = ConvElemwise(layer_name='out',
                         output_channels=1,
                         kernel_shape=(1, 1),
                         irange=0.05,
                         nonlinearity=IdentityConvNonlinearity(),
                         max_kernel_norm=7.9,
                         tied_b=1)
    layer.set_mlp(model)
    layer.set_input_space(model.layers[-3].get_output_space())
    model.layers[-2] = layer

    ### Add Sigmoid
    layer = SigmoidExtended(layer_name='y', n_classes=1)
    layer.set_mlp(model)
    layer.set_input_space(model.layers[-2].get_output_space())
    model.layers[-1] = layer

    #print model.layers
    #model.monitor = train.model.monitor
    #train.model = model
    train.model = push_monitor(model, "old")
    print train.model

    #train = Train(train.dataset, model, train.algorithm, train.save_path,
    #                train.save_freq, train.extensions, train.allow_overwrite)
    train.main_loop()
Beispiel #5
0
def test_transfer_experience():

    # Makes sure the transfer_experience flag of push_monitor works

    model = DummyModel(num_features = 3)
    monitor = Monitor.get_monitor(model)
    monitor.report_batch(2)
    monitor.report_batch(3)
    monitor.report_epoch()
    model = push_monitor(model, "old_monitor", transfer_experience=True)
    assert model.old_monitor is monitor
    monitor = model.monitor
    assert monitor.get_epochs_seen() == 1
    assert monitor.get_batches_seen() == 2
    assert monitor.get_epochs_seen() == 1
Beispiel #6
0
def test_transfer_experience():

    # Makes sure the transfer_experience flag of push_monitor works

    model = DummyModel(num_features = 3)
    monitor = Monitor.get_monitor(model)
    monitor.report_batch(2)
    monitor.report_batch(3)
    monitor.report_epoch()
    model = push_monitor(model, "old_monitor", transfer_experience=True)
    assert model.old_monitor is monitor
    monitor = model.monitor
    assert monitor.get_epochs_seen() == 1
    assert monitor.get_batches_seen() == 2
    assert monitor.get_epochs_seen() == 1
Beispiel #7
0
    def load(path):
        """Loads a model from path.

        We need this wrapper to make the loaded monitor continuable
        (currently deserialized monitor is non-functional in PyLearn2).
        For this we had to create a new monitor and initialize with the
        data from the old one.

        Parameters
        ----------
        path : str
            The model path.

        """
        model = push_monitor(serial.load(path), "_delete_me",
                             transfer_experience=True, save_records=True)
        del model._delete_me
        return model
Beispiel #8
0
def train_again(yaml):
    '''
    Relaunch training of a model with conditions specified by the YAML
    Looks for the model file defined by save path and replace the model
    instanciated by the one that was trained before
    -------------------------------------------------------------------
    yaml : string, filename
           YAML file defining the exp to be continued
    '''

    context = serial.load_train_file(yaml)
    print "\tLoaded YAML"

    # Load the trained model
    model_file = context.save_path

    for ext in range(len(context.extensions)):
        if isinstance(context.extensions[ext], MonitorBasedSaveBest):
            pos = ext
        else:
            raise AssertionError(
                'No MonitorBasedSaveBest extension in the model!')

    if not os.path.isfile(model_file):
        model_file = context.extensions[pos].save_path

    with open(model_file, 'r') as m_f:
        trained_model = pkl.load(m_f)

    # Define the continuing one
    new_model = push_monitor(trained_model,
                             'trained_model',
                             transfer_experience=True)

    # Define it as the model to be trained
    context.model = new_model
    context.save_path = context.extensions[pos].save_path[
        0:-4] + "_continue.pkl"
    context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl"

    # Train again
    context.main_loop()
Beispiel #9
0
    def load(path):
        """Loads a model from path.

        We need this wrapper to make the loaded monitor continuable
        (currently deserialized monitor is non-functional in PyLearn2).
        For this we had to create a new monitor and initialize with the
        data from the old one.

        Parameters
        ----------
        path : str
            The model path.

        """
        model = push_monitor(serial.load(path),
                             "_delete_me",
                             transfer_experience=True,
                             save_records=True)
        del model._delete_me
        return model
Beispiel #10
0
    def produce_train_obj(new_epochs, model=None):
        if model is None:
            model = MLP(
                layers=[Softmax(layer_name='y', n_classes=2, irange=0.)],
                nvis=3)
        else:
            model = push_monitor(model,
                                 'old_monitor',
                                 transfer_experience=True)

        dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                    y=np.random.normal(size=(6, 2)))

        epoch_counter = EpochCounter(max_epochs=N, new_epochs=new_epochs)

        algorithm = SGD(batch_size=2,
                        learning_rate=0.1,
                        termination_criterion=epoch_counter)

        return Train(dataset=dataset, model=model, algorithm=algorithm)
Beispiel #11
0
def train_again(yaml):
    '''
    Relaunch training of a model with conditions specified by the YAML
    Looks for the model file defined by save path and replace the model
    instanciated by the one that was trained before
    -------------------------------------------------------------------
    yaml : string, filename
           YAML file defining the exp to be continued
    '''

    context = serial.load_train_file(yaml)
    print "\tLoaded YAML"

    # Load the trained model
    model_file = context.save_path
    
    
    for ext in range(len(context.extensions)):
      if isinstance(context.extensions[ext],MonitorBasedSaveBest):
	pos = ext
      else:
	raise AssertionError('No MonitorBasedSaveBest extension in the model!')

    if not os.path.isfile(model_file):
        model_file = context.extensions[pos].save_path

    with open(model_file, 'r') as m_f:
        trained_model = pkl.load(m_f)

    # Define the continuing one
    new_model = push_monitor(trained_model, 'trained_model',
                             transfer_experience=True)
                            

    # Define it as the model to be trained
    context.model = new_model
    context.save_path = context.extensions[pos].save_path[0:-4] + "_continue.pkl"
    context.extensions[pos].save_path = context.save_path[0:-4] + "_best.pkl"

    # Train again
    context.main_loop()
Beispiel #12
0
    def produce_train_obj(new_epochs, max_epochs, model=None):
        if model is None:
            model = MLP(layers=[Softmax(layer_name='y',
                                        n_classes=2,
                                        irange=0.)],
                        nvis=3)
        else:
            model = push_monitor(model, 'old_monitor',
                                 transfer_experience=True)

        dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                    y=np.random.normal(size=(6, 2)))

        epoch_counter = EpochCounter(max_epochs=max_epochs,
                                     new_epochs=new_epochs)

        algorithm = SGD(batch_size=2,
                        learning_rate=0.1,
                        termination_criterion=epoch_counter)

        return Train(dataset=dataset, model=model, algorithm=algorithm)
Beispiel #13
0
def setup():
    N = 200000  # The paper keeps 1,000,000 memories
    num_frames = 4  # Prescribed by paper
    img_dims = (84, 84)  # Prescribed by paper
    action_dims = 4  # Prescribed by ALE
    batch_size = 32
    learning_rate = 0.05
    batches_per_iter = 1  # How many batches to pull from memory
    discount_factor = 0.95
    base_dir = '/data/lisa/exp/webbd/drl/experiments/2014-11-02'
    model_pickle_path = os.path.join(base_dir, 'best_model.pkl')

    log.info("Creating action cost.")
    action_cost = ActionCost.Action()

    # Load the model if it exists
    if os.path.exists(model_pickle_path):
        model = cPickle.load(open(model_pickle_path, 'rb'))
        model = monitor.push_monitor(model, "at",  transfer_experience=True)

    # Otherwise create a new model
    else:
        # TODO This is a hacky way to find the model yaml
        model_yaml = os.path.dirname(os.path.realpath(__file__))
        model_yaml = os.path.join(model_yaml, '../models/model_conv.yaml')
        log.info("Loading model yaml (%s)" % model_yaml)
        yaml_params = {
            'num_channels': num_frames,
            'action_dims': action_dims,
        }
        model = utils.load_yaml_template(model_yaml, yaml_params)

    log.info("Creating dataset.")
    dataset = Replay(N, img_dims, num_frames, action_dims)

    #monitoring_dataset = {}
    #monitoring_dataset['train'] = dataset

    log.info("Creating terminiation criterion.")
    termination_criterion = EpochCounter(1)

    log.info("Creating training algorithm.")
    algo = SGD(
        batch_size=batch_size,
        learning_rate=learning_rate,
        batches_per_iter=batches_per_iter,
        #monitoring_dataset=monitoring_dataset
        monitoring_dataset=None,
        cost=action_cost,
        termination_criterion=termination_criterion,
        learning_rule=RPROP()
    )

    log.info("Creating training object.")
    train = Train(dataset=None, model=model, algorithm=algo)

    log.info("Creating percept_preprocessor.")
    percept_preprocessor = ppp.DeepMindPreprocessor(img_dims, base_dir)

    log.info("Creating agent.")
    action_map = {
        0: 0,
        1: 1,
        2: 3,
        3: 4,
    }

    return BasicQAgent(
        model,
        dataset,
        train,
        percept_preprocessor,
        action_map,
        base_dir,
        model_pickle_path,
        discount_factor=discount_factor,
        k=num_frames,
        epsilon=1,
        epsilon_anneal_frames=5000000
    )
Beispiel #14
0
def cnn_train(
    train_path,
    test_path,
    valid_path,
    save_path,
    predict_path,
    image_path,
    num_rows=28,
    num_cols=28,
    num_channels=2,
    batch_size=128,
    output_channels=[64, 64],
    kernel_shape=[[12, 12], [5, 5]],
    pool_shape=[[4, 4], [2, 2]],
    pool_stride=[[2, 2], [2, 2]],
    irange=[0.05, 0.05, 0.05],
    max_kernel_norm=[1.9365, 1.9365],
    learning_rate=0.001,
    init_momentum=0.9,
    weight_decay=[0.0002, 0.0002, 0.0002],
    n_epoch=1000,
):
    #load data
    #t = time.time()
    ds = load_data(valid_path, num_rows, num_cols, num_channels)
    vld = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data(train_path, num_rows, num_cols, num_channels)
    trn = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data(test_path, num_rows, num_cols, num_channels)
    tst = SarDataset(np.array(ds[0]), ds[1])
    #load balanced data
    #ds = load_data_balance_under_sample(train_path, num_rows,num_cols, num_channels)
    #trn = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(valid_path, num_rows,num_cols, num_channels)
    #vld = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(test_path, num_rows,num_cols, num_channels)
    #tst = SarDataset(np.array(ds[0]),ds[1])
    #print 'Take {}s to read data'.format( time.time()-t)
    #use gaussian convlution on the origional image to see if it can concentrate in the center
    #trn,tst,vld = load_data_lidar()

    #mytransformer = transformer.TransformationPipeline(input_space=space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels),transformations=[transformer.Rotation(),transformer.Flipping()])
    #trn = contestTransformerDataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = contestTransformerDataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = contestTransformerDataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #trn = transformer_dataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = transformer_dataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = transformer_dataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #setup the network
    t = time.time()
    layers = []
    for i in range(len(output_channels)):
        layer_name = 'h{}'.format(i + 1)
        convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name,
                                            output_channels=output_channels[i],
                                            irange=irange[i],
                                            kernel_shape=kernel_shape[i],
                                            pool_shape=pool_shape[i],
                                            pool_stride=pool_stride[i],
                                            max_kernel_norm=max_kernel_norm[i])
        layers.append(convlayer)

    output_mlp = mlp.Linear(dim=1,
                            layer_name='output',
                            irange=irange[-1],
                            use_abs_loss=True)
    #output_mlp = mlp.linear_mlp_ace(dim=1,layer_name='output',irange=irange[-1])
    layers.append(output_mlp)

    #ann = cPickle.load(open('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl'))
    #layers = []
    #for layer in ann.layers:
    #    layer.set_mlp_force(None)
    #    layers.append(layer)

    trainer = sgd.SGD(
        learning_rate=learning_rate,
        batch_size=batch_size,
        termination_criterion=EpochCounter(n_epoch),
        #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]),
        #cost = dropout.Dropout(),
        cost=cost.SumOfCosts(
            [cost.MethodCost('cost_from_X'),
             WeightDecay(weight_decay)]),
        init_momentum=init_momentum,
        train_iteration_mode='even_shuffled_sequential',
        monitor_iteration_mode='even_shuffled_sequential',
        monitoring_dataset={
            'test': tst,
            'valid': vld,
            'train': trn
        })

    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)
    #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size)
    ann = serial.load(
        '../output/train_with_2010_2l_40_64/original_500/f/f0.pkl')
    ann = monitor.push_monitor(ann, 'stage_0')
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path=predict_path +
                                               save_path)
    flip = window_flip.WindowAndFlip((num_rows, num_cols),
                                     randomize=[tst, vld, trn])
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher, flip])
    print 'Take {}s to compile code'.format(time.time() - t)

    #train the network
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}h'.format((time.time() - t) / 3600)
    utils.sms_notice('Training time:{}'.format((time.time() - t) / 3600))

    return ann
Beispiel #15
0
def supervisedLayerwisePRL(trainset, testset):
    '''
	The supervised layerwise training as used in the PRL Paper.
	
	Input
	------
	trainset : A path to an hdf5 file created through h5py.
	testset  : A path to an hdf5 file created through h5py.
	'''
    batch_size = 100

    # Both train and test h5py files are expected to have a 'topo_view' and 'y'
    # datasets side them corresponding to the 'b01c' data format as used in pylearn2
    # and 'y' equivalent to the one hot encoded labels
    trn = HDF5Dataset(filename=trainset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    tst = HDF5Dataset(filename=testset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    '''
	The 1st Convolution and Pooling Layers are added below.
	'''
    h1 = mlp.ConvRectifiedLinear(layer_name='h1',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    layers = [h1, fc, output]

    mdl = mlp.MLP(layers,
                  input_space=Conv2DSpace(shape=(70, 70), num_channels=1))

    trainer = sgd.SGD(
        learning_rate=0.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(
            costs=[Default(),
                   WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best1.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl')
    mdl = push_monitor(mdl, 'k')
    '''
	The 2nd Convolution and Pooling Layers are added below.
	'''
    h2 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h2, fc, output])

    trainer = sgd.SGD(learning_rate=0.002,
                      batch_size=batch_size,
                      learning_rule=learning_rule.RMSProp(),
                      cost=SumOfCosts(costs=[
                          Default(),
                          WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005])
                      ]),
                      train_iteration_mode='shuffled_sequential',
                      monitor_iteration_mode='sequential',
                      termination_criterion=EpochCounter(max_epochs=15),
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld
                      })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best2.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl')
    mdl = push_monitor(mdl, 'l')
    '''
	The 3rd Convolution and Pooling Layers are added below.
	'''
    h3 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=10,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h3, output])

    trainer = sgd.SGD(
        learning_rate=.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(costs=[
            Default(),
            WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005])
        ]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best3.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()
Beispiel #16
0
def cnn_train(train_path, test_path, valid_path, save_path, predict_path,image_path,num_rows=28,
             num_cols =28,
             num_channels =2,
             batch_size =128,
             output_channels =[64,64],
             kernel_shape =[[12,12],[5,5]],
             pool_shape =[[4,4],[2,2]],
             pool_stride =[[2,2],[2,2]],
             irange =[0.05,0.05,0.05],
             max_kernel_norm =[1.9365,1.9365],
             learning_rate =0.001,
             init_momentum =0.9,
             weight_decay =[0.0002,0.0002,0.0002],
             n_epoch = 1000,
             ):
    #load data
    #t = time.time()
    ds = load_data(valid_path, num_rows,num_cols, num_channels)
    vld = SarDataset(np.array(ds[0]),ds[1])
    ds = load_data(train_path, num_rows,num_cols, num_channels)
    trn = SarDataset(np.array(ds[0]),ds[1])
    ds = load_data(test_path, num_rows,num_cols, num_channels)
    tst = SarDataset(np.array(ds[0]),ds[1])
    #load balanced data
    #ds = load_data_balance_under_sample(train_path, num_rows,num_cols, num_channels)
    #trn = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(valid_path, num_rows,num_cols, num_channels)
    #vld = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(test_path, num_rows,num_cols, num_channels)
    #tst = SarDataset(np.array(ds[0]),ds[1])
    #print 'Take {}s to read data'.format( time.time()-t)
    #use gaussian convlution on the origional image to see if it can concentrate in the center
    #trn,tst,vld = load_data_lidar()

    #mytransformer = transformer.TransformationPipeline(input_space=space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels),transformations=[transformer.Rotation(),transformer.Flipping()])
    #trn = contestTransformerDataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = contestTransformerDataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = contestTransformerDataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #trn = transformer_dataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = transformer_dataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = transformer_dataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #setup the network
    t = time.time()
    layers = []
    for i in range(len(output_channels)):
        layer_name = 'h{}'.format(i+1)
        convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name, output_channels=output_channels[i],irange=irange[i],kernel_shape=kernel_shape[i],pool_shape=pool_shape[i],pool_stride=pool_stride[i],max_kernel_norm=max_kernel_norm[i])
        layers.append(convlayer)


    output_mlp = mlp.Linear(dim=1,layer_name='output',irange=irange[-1], use_abs_loss=True)
    #output_mlp = mlp.linear_mlp_ace(dim=1,layer_name='output',irange=irange[-1])
    layers.append(output_mlp)


    #ann = cPickle.load(open('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl'))
    #layers = []
    #for layer in ann.layers:
    #    layer.set_mlp_force(None)
    #    layers.append(layer)

    trainer = sgd.SGD(learning_rate=learning_rate,batch_size=batch_size,
                      termination_criterion=EpochCounter(n_epoch),
                      #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]),
                      #cost = dropout.Dropout(),
                      cost = cost.SumOfCosts([cost.MethodCost('cost_from_X'), WeightDecay(weight_decay)]),
                      init_momentum=init_momentum,
                      train_iteration_mode='even_shuffled_sequential',
                      monitor_iteration_mode='even_shuffled_sequential',
                      monitoring_dataset={'test':  tst,
                                          'valid': vld,
                                          'train': trn})

    input_space = space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels)
    #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size)
    ann = serial.load('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl')
    ann = monitor.push_monitor(ann,'stage_0')
    watcher = best_params.MonitorBasedSaveBest(
            channel_name='valid_objective',
            save_path = predict_path+save_path)
    flip = window_flip.WindowAndFlip((num_rows,num_cols),randomize=[tst,vld,trn])
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher,flip])
    print 'Take {}s to compile code'.format(time.time()-t)

    #train the network
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}h'.format((time.time()-t)/3600)
    utils.sms_notice('Training time:{}'.format((time.time()-t)/3600))

    return ann