method = FLAGS.method trainset = melt.load_dataset(trainset_file) print "finish loading train set ",trainset_file testset = melt.load_dataset(testset_file) print "finish loading test set ", testset_file assert(trainset.num_features == testset.num_features) num_features = trainset.num_features print 'num_features: ', num_features print 'trainSet size: ', trainset.num_instances() print 'testSet size: ', testset.num_instances() print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs trainer = melt.gen_binary_classification_trainer(trainset) class LogisticRegresssion: def model(self, X, w): return melt.matmul(X,w) def forward(self, trainer): w = melt.init_weights([trainer.num_features, 1]) py_x = self.model(trainer.X, w) return py_x class Mlp: def model(self, X, w_h, w_o): h = tf.nn.sigmoid(melt.matmul(X, w_h)) # this is a basic mlp, think 2 stacked logistic regressions return tf.matmul(h, w_o) # note that we dont take the softmax at the end because our cost fn does that for us
method = FLAGS.method trainset = melt.load_dataset(trainset_file) print "finish loading train set ", trainset_file testset = melt.load_dataset(testset_file) print "finish loading test set ", testset_file assert (trainset.num_features == testset.num_features) num_features = trainset.num_features print 'num_features: ', num_features print 'trainSet size: ', trainset.num_instances() print 'testSet size: ', testset.num_instances() print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs trainer = melt.gen_binary_classification_trainer(trainset) class LogisticRegresssion: def model(self, X, w): return melt.matmul(X, w) def forward(self, trainer): w = melt.init_weights([trainer.num_features, 1]) py_x = self.model(trainer.X, w) return py_x class Mlp: def model(self, X, w_h, w_o): h = tf.nn.sigmoid(melt.matmul(
def train(self, trainset_file, testset_file, method, num_epochs, learning_rate, model_path): print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs print 'method:', method trainset = melt.load_dataset(trainset_file) print "finish loading train set ", trainset_file self.num_features = trainset.num_features print 'num_features: ', self.num_features print 'trainSet size: ', trainset.num_instances() testset = melt.load_dataset(testset_file) print "finish loading test set ", testset_file assert (trainset.num_features == testset.num_features) print 'testSet size: ', testset.num_instances() algo = self.gen_algo(method) trainer = melt.gen_binary_classification_trainer(trainset) self.algo = algo self.trainer = trainer print 'trainer_type:', trainer.type print 'trainer_index_only:', trainer.index_only cost, train_op, predict_op, evaluate_op = self.foward( algo, trainer, learning_rate) #self.foward(algo, trainer, learning_rate) config = None if not FLAGS.show_device: config = tf.ConfigProto() else: config = tf.ConfigProto(log_device_placement=True) config.gpu_options.allocator_type = 'BFC' self.session = tf.Session(config=config) init = tf.initialize_all_variables() self.session.run(init) summary_writer = None if FLAGS.use_summary: tf.scalar_summary("cross_entropy", self.cost) if FLAGS.use_auc_op: tf.scalar_summary("auc", evaluate_op) merged_summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.summary_path, self.session.graph_def) #os.system('rm -rf ' + FLAGS.model) os.system('mkdir -p ' + FLAGS.model) self.save_info(model_path) for epoch in range(num_epochs): if epoch > 0 and FLAGS.shuffle: trainset = melt.load_dataset(trainset_file) self.train_(trainset) need_stop = self.test_(testset, epoch=epoch) if need_stop: print 'need stop as improve is smaller then %f' % FLAGS.min_improve break #print weight #@FIXME if epoch % FLAGS.save_epochs == 0 and not trainer.index_only: self.save_model(model_path, epoch) self.save_model(model_path) if FLAGS.calibrate: dataset = trainset if not FLAGS.calibrate_trainset: dataset = testset self.calibrate_(dataset) #@TODO may be test set is right? CalibratorFactory.Save(self.calibrator, model_path + '/calibrator.bin') #self.calibrator.Save(model_path + '/calibrator.bin') self.calibrator.SaveText(model_path + '/calibrator.txt') if FLAGS.use_summary: teX, teY = testset.full_batch() summary_str = self.session.run(merged_summary_op, feed_dict=melt.gen_feed_dict( self.trainer, self.algo, teX, teY, test_mode=True)) summary_writer.add_summary(summary_str, epoch)