Example #1
0
 def SaveModel(self, file):
     print 'Saving model parameters'
     model = []
     for param in self.params:
         model.append(param.get_value())
     with open(file, 'wb') as f:
         pkl_utils.dump(model, f)
	def SaveModel(self,file):	
		print 'Saving model parameters'
		model = []
		for param in self.params:
			model.append(param.get_value())			
		with open(file,'wb') as f:
			pkl_utils.dump(model,f)
    def test_dump_load_mrg(self):
        rng = MRG_RandomStreams()

        with open('test', 'wb') as f:
            dump(rng, f)

        with open('test', 'rb') as f:
            rng = load(f)

        assert type(rng) == MRG_RandomStreams
Example #4
0
    def test_dump_load_mrg(self):
        rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled)

        with open('test', 'wb') as f:
            dump(rng, f)

        with open('test', 'rb') as f:
            rng = load(f)

        assert type(rng) == MRG_RandomStreams
Example #5
0
    def test_dump_load_mrg(self):
        rng = MRG_RandomStream()

        with open("test", "wb") as f:
            dump(rng, f)

        with open("test", "rb") as f:
            rng = load(f)

        assert type(rng) == MRG_RandomStream
Example #6
0
    def test_dump_load_mrg(self):
        rng = MRG_RandomStreams(use_cuda=cuda_ndarray.cuda_enabled)

        with open('test', 'wb') as f:
            dump(rng, f)

        with open('test', 'rb') as f:
            rng = load(f)

        assert type(rng) == MRG_RandomStreams
def test_dump_zip_names():
    foo_1 = theano.shared(0, name='foo')
    foo_2 = theano.shared(1, name='foo')
    with open('model.zip', 'wb') as f:
        dump((foo_1, foo_2, numpy.array(2)), f)
    keys = numpy.load('model.zip').keys()
    assert keys == ['foo', 'foo_2', 'array_0', 'pkl']
    foo = numpy.load('model.zip')['foo']
    assert foo == numpy.array(0)
    with open('model.zip', 'rb') as f:
        foo_1, foo_2, array = load(f)
    assert array == numpy.array(2)
Example #8
0
 def test_dump_zip_names(self):
     foo_1 = theano.shared(0, name="foo")
     foo_2 = theano.shared(1, name="foo")
     foo_3 = theano.shared(2, name="foo")
     with open("model.zip", "wb") as f:
         dump((foo_1, foo_2, foo_3, np.array(3)), f)
     keys = list(np.load("model.zip").keys())
     assert keys == ["foo", "foo_2", "foo_3", "array_0", "pkl"]
     foo_3 = np.load("model.zip")["foo_3"]
     assert foo_3 == np.array(2)
     with open("model.zip", "rb") as f:
         foo_1, foo_2, foo_3, array = load(f)
     assert array == np.array(3)
 def test_dump_zip_names(self):
     foo_1 = theano.shared(0, name='foo')
     foo_2 = theano.shared(1, name='foo')
     foo_3 = theano.shared(2, name='foo')
     with open('model.zip', 'wb') as f:
         dump((foo_1, foo_2, foo_3, np.array(3)), f)
     keys = list(np.load('model.zip').keys())
     assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl']
     foo_3 = np.load('model.zip')['foo_3']
     assert foo_3 == np.array(2)
     with open('model.zip', 'rb') as f:
         foo_1, foo_2, foo_3, array = load(f)
     assert array == np.array(3)
Example #10
0
 def test_dump_zip_names(self):
     foo_1 = theano.shared(0, name='foo')
     foo_2 = theano.shared(1, name='foo')
     foo_3 = theano.shared(2, name='foo')
     with open('model.zip', 'wb') as f:
         dump((foo_1, foo_2, foo_3, np.array(3)), f)
     keys = list(np.load('model.zip').keys())
     assert keys == ['foo', 'foo_2', 'foo_3', 'array_0', 'pkl']
     foo_3 = np.load('model.zip')['foo_3']
     assert foo_3 == np.array(2)
     with open('model.zip', 'rb') as f:
         foo_1, foo_2, foo_3, array = load(f)
     assert array == np.array(3)
Example #11
0
def test_dump_load():
    x = GpuArraySharedVariable(
        'x',
        GpuArrayType('float32', (1, 1), name='x', context_name=test_ctx_name),
        [[1]], False)

    with open('test', 'wb') as f:
        dump(x, f)

    with open('test', 'rb') as f:
        x = load(f)

    assert x.name == 'x'
    np.testing.assert_allclose(x.get_value(), [[1]])
Example #12
0
def test_dump_load():
    x = GpuArraySharedVariable('x',
                               GpuArrayType('float32', (1, 1), name='x',
                                            context_name=test_ctx_name),
                               [[1]], False)

    with open('test', 'wb') as f:
        dump(x, f)

    with open('test', 'rb') as f:
        x = load(f)

    assert x.name == 'x'
    np.testing.assert_allclose(x.get_value(), [[1]])
Example #13
0
    def test_dump_load(self):
        if not cuda_ndarray.cuda_enabled:
            raise SkipTest('Optional package cuda disabled')

        x = CudaNdarraySharedVariable('x', CudaNdarrayType((1, 1), name='x'),
                                      [[1]], False)

        with open('test', 'wb') as f:
            dump(x, f)

        with open('test', 'rb') as f:
            x = load(f)

        assert x.name == 'x'
        assert_allclose(x.get_value(), [[1]])
Example #14
0
    def test_dump_load(self):
        if not cuda_ndarray.cuda_enabled:
            raise SkipTest('Optional package cuda disabled')

        x = CudaNdarraySharedVariable('x', CudaNdarrayType((1, 1), name='x'),
                                      [[1]], False)

        with open('test', 'wb') as f:
            dump(x, f)

        with open('test', 'rb') as f:
            x = load(f)

        assert x.name == 'x'
        assert_allclose(x.get_value(), [[1]])
Example #15
0
def test_dump_load():
    x = GpuArraySharedVariable(
        "x",
        GpuArrayType("float32", (1, 1), name="x", context_name=test_ctx_name),
        [[1]],
        False,
    )

    with open("test", "wb") as f:
        dump(x, f)

    with open("test", "rb") as f:
        x = load(f)

    assert x.name == "x"
    np.testing.assert_allclose(x.get_value(), [[1]])
Example #16
0
    def post(self, *args, **kwargs):
        """
        Trains and saves a Ordinal Perceptron model
        :params:
        dataset: Dataset to be trained
        results: Results for supervised training
        :return: Dict with model_id to be later used for prediction
        """

        # Receives request files
        try:
            dataset = request.files['dataset']
            results = request.files['results']
        except:
            raise exceptions.NotAcceptable(
                "You need to send a dataset and a result set.")

        # Connects to db
        session = kwargs["db_connection"]

        ord = creation_and_training(dataset, results)

        # Create variables for database entry
        model_id = str(uuid.uuid4())
        timestamp = datetime.datetime.now()

        # Save model for future use
        with open("models/" + model_id + ".zip", "wb") as f:
            pkl.dump(ord, f)

        # Create new row and add it to db
        try:
            new_model = Model(MODEL=model_id,
                              AI="ordinal_perceptron",
                              TIMESTAMP=timestamp)

            session.add(new_model)

        finally:
            session.commit()

        # Create response
        response = dict()

        response['model_id'] = model_id

        return response
Example #17
0
def dump_weights_pickle(classifier, file_name='../weights/weight_3DCNN.zip'):
    W0 = classifier.params[0]
    W1 = classifier.params[2]
    W2 = classifier.params[4]
    W3 = classifier.params[6]
    W4 = classifier.params[8]
    W5 = classifier.params[10]

    b0 = classifier.params[1]
    b1 = classifier.params[3]
    b2 = classifier.params[5]
    b3 = classifier.params[7]
    b4 = classifier.params[9]
    b5 = classifier.params[11]

    with open(file_name, 'wb') as f:
        dump((W0, W1, W2, W3, W4, W5, b0, b1, b2, b3, b4, b5), f)
Example #18
0
            mrrs.append(metrics_results['mrr_at_n'])

            eval_sessions_metrics_log.append({
                'hitrate_at_n_gru4rec':
                metrics_results['hitrate_at_n'],
                'mrr_at_n_gru4rec':
                metrics_results['mrr_at_n'],
                'clicks_count':
                len(test_df),
                'sessions_count':
                test_df['SessionId'].nunique()
            })
            save_eval_benchmark_metrics_csv(
                eval_sessions_metrics_log,
                temp_folder,
                training_hours_for_each_eval=ARGS.training_hours_for_each_eval,
                output_csv=EVAL_METRICS_FILE)

    finally:
        print("AVG HitRate: {}".format(sum(hit_rates) / len(hit_rates)))
        print("AVG MRR: {}".format(sum(mrrs) / len(mrrs)))

        #Export trained model
        gru_file = open(os.path.join(temp_folder, MODEL_FILE), "wb+")
        try:
            pkl.dump(gru, gru_file)
        finally:
            gru_file.close()

    print('Trained model and eval results exported to temporary folder: {}'.
          format(temp_folder))
Example #19
0
    batch_size = 10

    session_ids = valid.SessionId.values[0:batch_size]
    input_item_ids = valid.ItemId.values[0:batch_size]

    out_idx = valid.ItemId.values[0:batch_size]
    uniq_out = np.unique(np.array(out_idx, dtype=np.int32))
    #predict_for_item_ids = np.hstack([data, uniq_out[~np.in1d(uniq_out,data)]])
    #LP: comment this if above works!
    predict_for_item_ids = None

    print('session_ids: {}'.format(session_ids))
    print('input_item_ids: {}'.format(input_item_ids))
    print('uniq_out: {}'.format(uniq_out))
    print('predict_for_item_ids: {}'.format(predict_for_item_ids))

    preds = gru.predict_next_batch(session_ids, input_item_ids,
                                   predict_for_item_ids, batch_size)
    preds.fillna(0, inplace=True)
    if break_ties:
        preds += np.random.rand(*preds.values.shape) * 1e-8

    print('Preds: {}'.format(preds))

    # save model
    fd = open(os.environ['HOME'] + 'model.theano', 'wb')
    dump(gru, fd)
    fd.close()

    print('Model: {}'.format(md))
Example #20
0
def save_model(params, path, name, suffix=''):
    from theano.misc import pkl_utils
    with open(path + name + suffix, 'wb') as fout:
        for param in params:
            pkl_utils.dump(param.get_value(), fout)
Example #21
0
    def train(self,
              dataset,
              lr=1.,
              gamma=.9,
              beta1=0.9,
              beta2=0.999,
              min_batch_size=100,
              max_batch_size=None,
              num_epochs=200,
              save_as=None,
              early_stopping=-1E+6):
        """Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
        files converted to piano-rolls.

        dataset : list of numpy arrays
        batch_size : integer
            Training sequences will be split into subsequences of at most this
            size before applying the SGD updates.
        num_epochs : integer
            Number of epochs (pass over the training set) performed. The user
            can safely interrupt training with Ctrl+C at any time."""

        if self.optimizer is 'adadelta':
            hyperparams = (lr, gamma)
        elif self.optimizer is 'adam':
            hyperparams = (lr, beta1, beta2)
        else:
            hyperparams = (lr, )

        if type(dataset) is not list:
            dataset = [dataset]

        self.dataset = dataset
        nsamples = len(dataset)

        # flatten all parameters into an array for FLANN NN computation:
        # TODO: not sure if this is very useful, since the parameter space
        # is very high dimensional... basically parameters can bounce around
        # minimum and NN will not converge to zero in a long time... take e.g.
        # large dim. arrays with random 0, 1's and compute nn after generating
        # a new one each time! But hmm I think it should be steadily decreasing...
        # anyway think about it
        # import pyflann
        # flann = pyflann.FLANN()
        # pyflann.set_distance_type('euclidean')

        param_vec = self.param_vec
        if param_vec is None:
            param_vec = np.array([])
            for param in self.parameters:
                param_vec = np.concatenate(
                    (param_vec, param.get_value().flatten()))
            param_vec = param_vec[None, :]

        if max_batch_size is None:
            max_batch_size = min_batch_size
            mean_batch_size = min_batch_size
        else:
            mean_batch_size = int((min_batch_size + max_batch_size) / 2)

        best_monitor = 1E+6
        done = False
        try:
            for epoch in xrange(num_epochs):
                if done:
                    break
                start_time = time.time()

                costs = []
                monitors = []

                # shuffle dataset:
                shuffle(dataset)

                for sample_number, sample in enumerate(dataset):

                    # split to batches:
                    sample_size = len(sample)
                    idx = np.random.randint(min_batch_size, max_batch_size + 1,
                                            int(sample_size /
                                                min_batch_size)).cumsum()
                    idx = idx[idx < sample_size - min_batch_size]
                    batches = np.split(sample, idx)
                    shuffle(batches)
                    nbatches = len(batches)

                    for n, batch in enumerate(batches):

                        # don't train with almost empty batch:
                        if np.sum(batch) < min_steps_in_batch:
                            continue
                        if batch.shape[0] < 3:  # just in case...
                            continue

                        monitor, cost = self.train_function(
                            batch, *hyperparams)

                        #TODO: revert to saved parameters in case of nans (?)
                        if np.isnan(cost):
                            raise ValueError(
                                '\nNaN encountered, breaking out!')

                        if np.abs(cost) > 1E+9:
                            raise ValueError('\nCost blew up, breaking out!')

                        costs.append(cost)
                        monitors.append(monitor)
                        pct_progress = int(100 * n / nbatches)
                        print('\rSample: {:6}/{} -- Progress: {:3}% -- '
                              'Cost={:6.3f} -- Monitor={:6.3f}'.format(
                                  sample_number + 1, nsamples, pct_progress,
                                  float(cost), float(monitor)),
                              end='')
                        sys.stdout.flush()
                        if monitor < early_stopping:
                            print('\nEarly stop.')
                            done = True
                            break

                costs = np.asarray(costs)
                monitors = np.asarray(monitors)
                costs[costs > 1e+6] = 1e+6  # getting rid of infs
                monitors[monitors > 1e+6] = 1e+6  # getting rid of infs
                self.costs = costs
                self.monitors = monitors
                avg_cost = np.round(np.mean(costs), 4)
                std_cost = np.round(np.std(costs), 4)
                avg_monitor = np.round(np.mean(monitors), 4)
                std_monitor = np.round(np.std(monitors), 4)
                time_elapsed = time.time() - start_time

                # Nearest neighbors in parameter space:
                # param_vec_next = np.array([])
                # for param in self.parameters:
                #     param_vec_next = np.concatenate((param_vec_next, param.get_value().flatten()))

                #flann_params = flann.build_index(param_vec, target_precision=.9)
                #nn_dist = np.sqrt(flann.nn(param_vec, param_vec_next, 1)[1][0])

                # add to previous parameter vectors:
                # param_vec = np.vstack((param_vec, param_vec_next))

                print('\rEpoch {:4}/{} | Cost mean={:6.3f}, std={:6.3f} | '
                      'Monitor mean={:6.4f}, std={:6.3f} | '
                      'Time={} s\n'.format(epoch + 1, num_epochs, avg_cost,
                                           std_cost, avg_monitor, std_monitor,
                                           np.round(time_elapsed, 0)),
                      end='')
                sys.stdout.flush()

                if save_as is not None and avg_monitor < best_monitor:
                    #print('Saving results...')
                    best_monitor = avg_monitor
                    # save full state, not just parameters:
                    # param_list = []
                    # gtm1_list = []
                    # stm1_list = []
                    # for n in range(len(self.parameters)):
                    #     try:
                    #         param_list.append(self.parameters[n])
                    #         gtm1_list.append(self.gtm1[n])
                    #         stm1_list.append(self.stm1[n])
                    #     except:
                    #         break

                    saved_state = self.parameters + self.gtm1 + self.stm1

                    with open(save_as + '.zip', 'w') as f:
                        dump(saved_state, f)

                    # savefile = file(save_as + '.save', mode='wb')
                    # cPickle.dump(saved_state, savefile)
                    # savefile.close()

        except KeyboardInterrupt:
            self.costs = costs
            self.monitors = monitors
            print('\nInterrupted by user.')
Example #22
0
        x, l = list(res_buys.items())[list(res_buys.keys()).index(k)]
        print("Buys Validation:")
        for e in l:
            print(x, ':', e[0], ' ', e[1])

    if export_csv is not None:

        file = open(export_csv, 'w+')
        file.write('Metrics;')

        for k, l in res.items():
            for e in l:
                file.write(e[0])
                file.write(';')
            break

        file.write('\n')

        for k, l in res.items():
            file.write(k)
            file.write(';')
            for e in l:
                file.write(str(e[1]))
                file.write(';')
            file.write('\n')

    #comment out when not using gru
    file = open("mdl/test-gru.mdl", "wb+")
    pkl.dump(gru, file)
    file.close()
Example #23
0
def save_model_params(model, model_path):
    with open(os.path.join(model_path, "params.zip"), 'w+') as f:
        pkl_utils.dump((model.parameters, model.update_rule.parameters), f)
Example #24
0
 def save(self, filename):
     with open(filename, 'wb') as f:
         dump(self.get_save(), f)
     print "\nSuccessfully saved architecture to file: %s" % filename
 def dumps(self, file_path):
     with open(file_path, 'wb') as f:
         dump(self.params, f)
Example #26
0
 def save_feature_maps(self,filename):
     cwd = os.path.dirname(os.path.realpath(__file__))
     with open(os.path.join(cwd,filename+'.zip'),'wb') as f:
         for k in self.conv_layers:
             params += [param.get_value() for param in self.params[k]]
             dump(params, f)
Example #27
0
 def save_params(self, filename):
     cwd = os.path.dirname(os.path.realpath(__file__))
     with open(os.path.join(cwd,filename+'.zip'),'wb') as f:
         params = [param.get_value() for param in self.full_params]
         dump(params, f)
Example #28
0
def save_model_params(model, model_path):
    with open(os.path.join(model_path, "params.zip"), 'w+') as f:
        pkl_utils.dump((model.parameters, model.update_rule.parameters), f)
Example #29
0
    def train(self, dataset, lr=1., gamma=.9, beta1=0.9, beta2=0.999,
              min_batch_size=100, max_batch_size=None, num_epochs=200,
              save_as=None, early_stopping=-1E+6):
        """Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
        files converted to piano-rolls.

        dataset : list of numpy arrays
        batch_size : integer
            Training sequences will be split into subsequences of at most this
            size before applying the SGD updates.
        num_epochs : integer
            Number of epochs (pass over the training set) performed. The user
            can safely interrupt training with Ctrl+C at any time."""

        if self.optimizer is 'adadelta':
            hyperparams = (lr, gamma)
        elif self.optimizer is 'adam':
            hyperparams = (lr, beta1, beta2)
        else:
            hyperparams = (lr, )

        if type(dataset) is not list:
            dataset = [dataset]

        self.dataset = dataset
        nsamples = len(dataset)

        # flatten all parameters into an array for FLANN NN computation:
        # TODO: not sure if this is very useful, since the parameter space
        # is very high dimensional... basically parameters can bounce around
        # minimum and NN will not converge to zero in a long time... take e.g.
        # large dim. arrays with random 0, 1's and compute nn after generating
        # a new one each time! But hmm I think it should be steadily decreasing...
        # anyway think about it
        # import pyflann
        # flann = pyflann.FLANN()
        # pyflann.set_distance_type('euclidean')

        param_vec = self.param_vec
        if param_vec is None:
            param_vec = np.array([])
            for param in self.parameters:
                param_vec = np.concatenate((param_vec, param.get_value().flatten()))
            param_vec = param_vec[None, :]

        if max_batch_size is None:
            max_batch_size = min_batch_size
            mean_batch_size = min_batch_size
        else:
            mean_batch_size = int((min_batch_size + max_batch_size) / 2)

        best_monitor = 1E+6
        done = False
        try:
            for epoch in xrange(num_epochs):
                if done:
                    break
                start_time = time.time()

                costs = []
                monitors = []

                # shuffle dataset:
                shuffle(dataset)

                for sample_number, sample in enumerate(dataset):

                    # split to batches:
                    sample_size = len(sample)
                    idx = np.random.randint(min_batch_size,
                                            max_batch_size + 1,
                                            int(sample_size / min_batch_size)).cumsum()
                    idx = idx[idx < sample_size - min_batch_size]
                    batches = np.split(sample, idx)
                    shuffle(batches)
                    nbatches = len(batches)

                    for n, batch in enumerate(batches):

                        # don't train with almost empty batch:
                        if np.sum(batch) < min_steps_in_batch:
                            continue
                        if batch.shape[0] < 3:  # just in case...
                            continue

                        monitor, cost = self.train_function(batch, *hyperparams)

                        #TODO: revert to saved parameters in case of nans (?)
                        if np.isnan(cost):
                            raise ValueError('\nNaN encountered, breaking out!')

                        if np.abs(cost) > 1E+9:
                            raise ValueError('\nCost blew up, breaking out!')

                        costs.append(cost)
                        monitors.append(monitor)
                        pct_progress = int(100 * n / nbatches)
                        print('\rSample: {:6}/{} -- Progress: {:3}% -- '
                              'Cost={:6.3f} -- Monitor={:6.3f}'.format(sample_number + 1,
                                                                       nsamples,
                                                                       pct_progress,
                                                                       float(cost),
                                                                       float(monitor)), end='')
                        sys.stdout.flush()
                        if monitor < early_stopping:
                            print('\nEarly stop.')
                            done = True
                            break

                costs = np.asarray(costs)
                monitors = np.asarray(monitors)
                costs[costs > 1e+6] = 1e+6  # getting rid of infs
                monitors[monitors > 1e+6] = 1e+6  # getting rid of infs
                self.costs = costs
                self.monitors = monitors
                avg_cost = np.round(np.mean(costs), 4)
                std_cost = np.round(np.std(costs), 4)
                avg_monitor = np.round(np.mean(monitors), 4)
                std_monitor = np.round(np.std(monitors), 4)
                time_elapsed = time.time() - start_time

                # Nearest neighbors in parameter space:
                # param_vec_next = np.array([])
                # for param in self.parameters:
                #     param_vec_next = np.concatenate((param_vec_next, param.get_value().flatten()))

                #flann_params = flann.build_index(param_vec, target_precision=.9)
                #nn_dist = np.sqrt(flann.nn(param_vec, param_vec_next, 1)[1][0])

                # add to previous parameter vectors:
                # param_vec = np.vstack((param_vec, param_vec_next))

                print('\rEpoch {:4}/{} | Cost mean={:6.3f}, std={:6.3f} | '
                      'Monitor mean={:6.4f}, std={:6.3f} | '
                      'Time={} s\n'.format(epoch + 1,
                                                               num_epochs,
                                                               avg_cost,
                                                               std_cost,
                                                               avg_monitor,
                                                               std_monitor,
                                                               np.round(time_elapsed, 0)), end='')
                sys.stdout.flush()

                if save_as is not None and avg_monitor < best_monitor:
                    #print('Saving results...')
                    best_monitor = avg_monitor
                    # save full state, not just parameters:
                    # param_list = []
                    # gtm1_list = []
                    # stm1_list = []
                    # for n in range(len(self.parameters)):
                    #     try:
                    #         param_list.append(self.parameters[n])
                    #         gtm1_list.append(self.gtm1[n])
                    #         stm1_list.append(self.stm1[n])
                    #     except:
                    #         break

                    saved_state = self.parameters + self.gtm1 + self.stm1

                    with open(save_as + '.zip', 'w') as f:
                        dump(saved_state, f)

                    # savefile = file(save_as + '.save', mode='wb')
                    # cPickle.dump(saved_state, savefile)
                    # savefile.close()




        except KeyboardInterrupt:
            self.costs = costs
            self.monitors = monitors
            print('\nInterrupted by user.')
Example #30
0
def dump_params_pickle(file,params_to_pickle):
     with open(file, 'wb') as f:
        dump(params_to_pickle, f)
Example #31
0
def dump_params_pickle(file, params_to_pickle):
    with open(file, 'wb') as f:
        dump(params_to_pickle, f)