method = FLAGS.method

trainset = melt.load_dataset(trainset_file)
print "finish loading train set ",trainset_file
testset = melt.load_dataset(testset_file)
print "finish loading test set ", testset_file

assert(trainset.num_features == testset.num_features)
num_features = trainset.num_features
print 'num_features: ', num_features
print 'trainSet size: ', trainset.num_instances()
print 'testSet size: ', testset.num_instances()
print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs


trainer = melt.gen_binary_classification_trainer(trainset)

class LogisticRegresssion:
	def model(self, X, w):
		return melt.matmul(X,w)
	
	def forward(self, trainer):
		w = melt.init_weights([trainer.num_features, 1]) 
		py_x = self.model(trainer.X, w)
		return py_x

class Mlp:
	def model(self, X, w_h, w_o):
		h = tf.nn.sigmoid(melt.matmul(X, w_h)) # this is a basic mlp, think 2 stacked logistic regressions
		return tf.matmul(h, w_o) # note that we dont take the softmax at the end because our cost fn does that for us
	
Beispiel #2
0
method = FLAGS.method

trainset = melt.load_dataset(trainset_file)
print "finish loading train set ", trainset_file
testset = melt.load_dataset(testset_file)
print "finish loading test set ", testset_file

assert (trainset.num_features == testset.num_features)
num_features = trainset.num_features
print 'num_features: ', num_features
print 'trainSet size: ', trainset.num_instances()
print 'testSet size: ', testset.num_instances()
print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs

trainer = melt.gen_binary_classification_trainer(trainset)


class LogisticRegresssion:
    def model(self, X, w):
        return melt.matmul(X, w)

    def forward(self, trainer):
        w = melt.init_weights([trainer.num_features, 1])
        py_x = self.model(trainer.X, w)
        return py_x


class Mlp:
    def model(self, X, w_h, w_o):
        h = tf.nn.sigmoid(melt.matmul(
Beispiel #3
0
    def train(self, trainset_file, testset_file, method, num_epochs,
              learning_rate, model_path):
        print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs
        print 'method:', method

        trainset = melt.load_dataset(trainset_file)
        print "finish loading train set ", trainset_file
        self.num_features = trainset.num_features
        print 'num_features: ', self.num_features
        print 'trainSet size: ', trainset.num_instances()
        testset = melt.load_dataset(testset_file)
        print "finish loading test set ", testset_file
        assert (trainset.num_features == testset.num_features)
        print 'testSet size: ', testset.num_instances()

        algo = self.gen_algo(method)
        trainer = melt.gen_binary_classification_trainer(trainset)
        self.algo = algo
        self.trainer = trainer
        print 'trainer_type:', trainer.type
        print 'trainer_index_only:', trainer.index_only

        cost, train_op, predict_op, evaluate_op = self.foward(
            algo, trainer, learning_rate)
        #self.foward(algo, trainer, learning_rate)

        config = None
        if not FLAGS.show_device:
            config = tf.ConfigProto()
        else:
            config = tf.ConfigProto(log_device_placement=True)
        config.gpu_options.allocator_type = 'BFC'

        self.session = tf.Session(config=config)
        init = tf.initialize_all_variables()
        self.session.run(init)

        summary_writer = None
        if FLAGS.use_summary:
            tf.scalar_summary("cross_entropy", self.cost)
            if FLAGS.use_auc_op:
                tf.scalar_summary("auc", evaluate_op)
            merged_summary_op = tf.merge_all_summaries()
            summary_writer = tf.train.SummaryWriter(FLAGS.summary_path,
                                                    self.session.graph_def)

        #os.system('rm -rf ' + FLAGS.model)
        os.system('mkdir -p ' + FLAGS.model)

        self.save_info(model_path)

        for epoch in range(num_epochs):
            if epoch > 0 and FLAGS.shuffle:
                trainset = melt.load_dataset(trainset_file)

            self.train_(trainset)
            need_stop = self.test_(testset, epoch=epoch)

            if need_stop:
                print 'need stop as improve is smaller then %f' % FLAGS.min_improve
                break

            #print weight
            #@FIXME
            if epoch % FLAGS.save_epochs == 0 and not trainer.index_only:
                self.save_model(model_path, epoch)

        self.save_model(model_path)
        if FLAGS.calibrate:
            dataset = trainset
            if not FLAGS.calibrate_trainset:
                dataset = testset
            self.calibrate_(dataset)  #@TODO may be test set is right?
            CalibratorFactory.Save(self.calibrator,
                                   model_path + '/calibrator.bin')
            #self.calibrator.Save(model_path + '/calibrator.bin')
            self.calibrator.SaveText(model_path + '/calibrator.txt')

        if FLAGS.use_summary:
            teX, teY = testset.full_batch()
            summary_str = self.session.run(merged_summary_op,
                                           feed_dict=melt.gen_feed_dict(
                                               self.trainer,
                                               self.algo,
                                               teX,
                                               teY,
                                               test_mode=True))
            summary_writer.add_summary(summary_str, epoch)