if __name__ == '__main__':
    params = config.load_parameters(sys.argv[1])
    dataset = load_data(params.dataset,
                              resize_to = params.resize_data_to,
                              shared = False,
                              pickled = params.pickled)
    x = T.matrix('x')
    y = T.ivector('y')
    index = T.lscalar('index')
    method = params.method
    method.prepare(params,dataset)
    train_set = method.resampler.get_train()
    valid_set = method.resampler.get_valid()
    test_set = method.resampler.get_test()
    test_set_x, test_set_y = test_set
    shared_dataset = [train_set,valid_set,test_set]
    continuations = dill.load(open(sys.argv[2]))
    members = []
    i = 0
    for c in continuations:
        print "training member {0}".format(i)
        m = test_mlp(shared_dataset, params, continuation = c, x=x, y=y,
                     index=index)
        members.append(m.get_weights())
        m.clear()
        del m
        gc.collect()
        i += 1
    dill.dump(members,open(sys.argv[3],"wb"))
Exemple #2
0
        print "saving results to {0}@{1}".format(params.results_db, host)
        conn = MongoClient(host=host)
        db = conn[params.results_db]
        if 'results_table' in params.__dict__:
            table_name = params.results_table
        else:
            table_name = 'results'
        table = db[table_name]
        results = {
            "params": params.__dict__,
            "test_losses": test_losses,
            "test_score": test_score,
        }
        table.insert(json.loads(json.dumps(results, default=common.serialize)))

    train, valid, test = dataset
    train_x, train_y = train
    valid_x, valid_y = valid
    test_x, test_y = test
    shared_train_x = sharedX(train_x)
    shared_valid_x = sharedX(valid_x)
    shared_test_x = sharedX(test_x)
    distilled_train_y = ensemble.classify(shared_train_x).eval()
    distilled_set = (
        (shared_train_x, sharedX(distilled_train_y, dtype=numpy.int32)),
        (shared_valid_x, sharedX(valid_y, dtype=numpy.int32)),
        (shared_test_x, sharedX(test_y, dtype=numpy.int32)),
    )
    #y = T.vector()
    mlp = test_mlp(distilled_set, params, y=y)
Exemple #3
0
    arg_param_pairings = [
        (args.seed, 'random_seed'),
        (args.results_db, 'results_db'),
        (args.results_host, 'results_host'),
        (args.results_table, 'results_table'),
        (args.epochs, 'n_epochs'),
    ]
    from toupee import config
    params = config.load_parameters(args.params_file)

    def arg_params(arg_value, param):
        if arg_value is not None:
            params.__dict__[param] = arg_value

    for arg, param in arg_param_pairings:
        arg_params(arg, param)

    from toupee import data
    from toupee.mlp import MLP, test_mlp
    dataset = data.load_data(params.dataset,
                             resize_to=params.resize_data_to,
                             shared=False,
                             pickled=params.pickled,
                             center_and_normalise=params.center_and_normalise,
                             join_train_and_valid=params.join_train_and_valid)
    pretraining_set = data.make_pretraining_set(dataset, params.pretraining)
    mlp = test_mlp(dataset, params, pretraining_set=pretraining_set)
    if args.save_file is not None:
        dill.dump(mlp, open(args.save_file, "wb"))
Exemple #4
0
    arg_param_pairings = [
        (args.seed, 'random_seed'),
        (args.results_db, 'results_db'),
        (args.results_host, 'results_host'),
        (args.results_table, 'results_table'),
        (args.epochs, 'n_epochs'),
    ]
    from toupee import config
    params = config.load_parameters(args.params_file)

    def arg_params(arg_value,param):
        if arg_value is not None:
            params.__dict__[param] = arg_value

    for arg, param in arg_param_pairings:
        arg_params(arg,param)

    from toupee import data
    from toupee.mlp import MLP, test_mlp
    dataset = data.load_data(params.dataset,
                             resize_to = params.resize_data_to,
                             shared = False,
                             pickled = params.pickled,
                             center_and_normalise = params.center_and_normalise,
                             join_train_and_valid = params.join_train_and_valid)
    pretraining_set = data.make_pretraining_set(dataset,params.pretraining)
    mlp = test_mlp(dataset, params, pretraining_set = pretraining_set)
    if args.save_file is not None:
        dill.dump(mlp,open(args.save_file,"wb"))
    params.pretraining = None
    dataset = load_data(params.dataset,
                              resize_to = params.resize_data_to,
                              shared = False,
                              pickled = params.pickled)
    x = T.matrix('x')
    y = T.ivector('y')
    index = T.lscalar('index')
    method = params.method
    method.prepare(params,dataset)
    train_set = method.resampler.get_train()
    valid_set = method.resampler.get_valid()
    test_set = method.resampler.get_test()
    shared_dataset = [train_set,valid_set,test_set]
    continuations = dill.load(open(sys.argv[2]))
    members = [test_mlp(shared_dataset, params, continuation = c, x=x, y=y,
        index=index) for c in continuations]
    ensemble = params.method.create_aggregator(params,members,x,y,train_set,valid_set)
    test_set_x, test_set_y = method.resampler.get_test()
    test_model = theano.function(inputs=[index],
            outputs=ensemble.errors,
            givens={
                x: test_set_x[index * params.batch_size:(index + 1) *
                    params.batch_size],
                y: test_set_y[index * params.batch_size:(index + 1) *
                    params.batch_size]})
    n_test_batches = test_set_x.shape[0].eval() / params.batch_size
    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = numpy.mean(test_losses)
    print 'Final error: {0} %'.format(test_score * 100.)
        if 'results_table' in params.__dict__: 
            table_name = params.results_table
        else:
            table_name = 'results'
        table = db[table_name]
        results = {
                    "params": params.__dict__,
                    "test_losses" : test_losses,
                    "test_score" : test_score,
                  }
        table.insert(json.loads(json.dumps(results,default=common.serialize)))
    
    train,valid,test = dataset
    train_x,train_y = train
    valid_x,valid_y = valid
    test_x,test_y = test
    shared_train_x = sharedX(train_x)
    shared_valid_x = sharedX(valid_x)
    shared_test_x = sharedX(test_x)
    distilled_train_y = ensemble.classify(shared_train_x).eval()
    distilled_set = (
                        (shared_train_x,
                            sharedX(distilled_train_y, dtype = numpy.int32)),
                        (shared_valid_x,
                            sharedX(valid_y, dtype = numpy.int32)),
                        (shared_test_x,
                            sharedX(test_y, dtype = numpy.int32)),
                    )
    #y = T.vector()
    mlp = test_mlp(distilled_set, params, y = y)