def test_(self, testset, epoch=0): assert (testset.num_features == self.num_features) auc_ = 0.5 if self.evaluate_op: teX, teY = testset.full_batch() predicts, auc_, cost_ = self.session.run( [self.predict_op, self.evaluate_op, self.cost], feed_dict=melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode=True)) else: # teX, teY = testset.full_batch() # predicts, cost_ = self.session.run([self.predict_op, self.cost], feed_dict = melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode = True)) num_test_instances = testset.num_instances() predicts = [] cost_ = 0 while True: teX, teY = testset.next_batch(batch_size) if teX is None: break predicts_, now_cost = self.session.run( [self.predict_op, self.cost], feed_dict=melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode=True)) predicts.extend(predicts_) cost_ += now_cost teY = testset.labels predicts = np.array(predicts) #print np.array(zip(teY, predicts)) #print len(teY), len(predicts) auc_ = auc(teY, predicts) print epoch, ' auc:', auc_, 'cost:', cost_ / len(teY) need_stop = False if FLAGS.auto_stop and (auc_ - self.auc) < FLAGS.min_improve: need_stop = True self.auc = auc_ return need_stop
def test_(self, testset, epoch=0): assert (testset.num_features == self.num_features) auc_ = 0.0 if self.evaluate_op: teX, teY = testset.full_batch() predicts, auc_, cost_ = self.session.run( [self.predict_op, self.evaluate_op, self.cost], feed_dict=melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode=True)) else: # teX, teY = testset.full_batch() # predicts, cost_ = self.session.run([self.predict_op, self.cost], feed_dict = melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode = True)) num_test_instances = testset.num_instances() predicts = [] cost_ = 0 while True: teX, teY = testset.next_batch(batch_size) if teX is None: break predicts_, now_cost = self.session.run( [self.predict_op, self.cost], feed_dict=melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode=True)) predicts.extend(predicts_) cost_ += now_cost teY = testset.labels predicts = np.array(predicts) #print teY #print predicts print np.array(zip(teY, predicts)) #print len(teY), len(predicts) auc_ = auc(teY, predicts) #auc_ = 0.5 #predicts, auc_, cost_, weight = self.session.run([predict_op, evaluate_op, cost, self.weight], feed_dict = trainer.gen_feed_dict(teX, teY)) print epoch, ' auc:', auc_, 'cost:', cost_ / len(teY)
def calibrate_(self, dataset): num_instances = dataset.num_instances() #@TODO better iteration now will lost last group data for start, end in zip(range(0, num_instances, batch_size), range(batch_size, num_instances, batch_size)): trX, trY = dataset.mini_batch(start, end) predicts = self.session.run(self.predict_op, feed_dict=melt.gen_feed_dict( self.trainer, self.algo, trX, trY)) self.calibrate(trY, predicts) self.calibrator.FinishTraining()
def predict(self, feature_vecs): if type(feature_vecs) != list: feature_vecs = [feature_vecs] spf = melt.sparse_vectors2sparse_features(feature_vecs) predicts = self.session.run([self.predict_op], feed_dict=melt.gen_feed_dict( self.trainer, self.algo, spf, test_mode=True)) return predicts
def train_(self, trainset): num_train_instances = trainset.num_instances() #@TODO this minibatch will lost the last instances.. # for start, end in zip(range(0, num_train_instances, batch_size), range(batch_size, num_train_instances, batch_size)): # trX, trY = trainset.mini_batch(start, end) # self.session.run(self.train_op, feed_dict = melt.gen_feed_dict(self.trainer, self.algo, trX, trY)) while True: trX, trY = trainset.next_batch(batch_size) if trX is None: break self.session.run(self.train_op, feed_dict=melt.gen_feed_dict( self.trainer, self.algo, trX, trY))
def predict(self, feature_vecs): if type(feature_vecs) != list: feature_vecs = [feature_vecs] trX = None if self.trainer.type == 'sparse': trX = melt.sparse_vectors2sparse_features(feature_vecs) else: #dense trX = melt.dense_vectors2features(feature_vecs, self.trainer.index_only) predicts = self.session.run([self.predict_op], feed_dict=melt.gen_feed_dict( self.trainer, self.algo, trX, test_mode=True)) return predicts
def train_(self, trainset, testset=None, epoch=0): num_train_instances = trainset.num_instances() round = 0 start_time = time.time() while True: trX, trY = trainset.next_batch(batch_size) if trX is None: break feed_dict = melt.gen_feed_dict(self.trainer, self.algo, trX, trY) _, cost_, accuracy_ = self.session.run( [self.train_op, self.cost, self.accuracy], feed_dict=feed_dict) #py_x = self.session.run(self.py_x, feed_dict = feed_dict) #print py_x if round % 100 == 0: end_time = time.time() duration = end_time - start_time start_time = end_time print 'epoch:', epoch, 'round:', round, 'train precision@1:', accuracy_, 'cost:', cost_, 'duration:', duration if round % 1000 == 0: self.test_(testset, epoch, round) round += 1
def test_(self, testset, epoch=0, round=0): #assert(testset.num_features == self.num_features) #num_test_instances = testset.num_instances() predicts = [] cost_ = 0. accuracy_ = 0. ground = round round = 0 while True: teX, teY = testset.next_batch(batch_size * 10) if teX is None: break feed_dict = melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode=True) now_cost, now_accuracy = self.session.run( [self.cost, self.accuracy], feed_dict=feed_dict) cost_ += now_cost accuracy_ += now_accuracy round += 1 cost_ / round accuracy_ /= round print 'epoch:', epoch, 'round:', ground, 'test precision@1:', accuracy_, 'cost:', cost_ need_stop = False #if FLAGS.auto_stop and (accuracy_ - self.avg_accuracy) < FLAGS.min_improve: # need_stop = True self.avg_accuracy = accuracy_ # if FLAGS.use_summary: # teX, teY = testset.full_batch() # summary_str = self.session.run(merged_summary_op, feed_dict = melt.gen_feed_dict(self.trainer, self.algo, teX, teY, test_mode = True)) # summary_writer.add_summary(summary_str, epoch) return need_stop
def train(self, trainset_file, testset_file, method, num_epochs, learning_rate, model_path): print 'batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_epochs:', num_epochs print 'method:', method trainset = melt.load_dataset(trainset_file) print "finish loading train set ", trainset_file self.num_features = trainset.num_features print 'num_features: ', self.num_features print 'trainSet size: ', trainset.num_instances() testset = melt.load_dataset(testset_file) print "finish loading test set ", testset_file assert (trainset.num_features == testset.num_features) print 'testSet size: ', testset.num_instances() algo = self.gen_algo(method) trainer = melt.gen_binary_classification_trainer(trainset) self.algo = algo self.trainer = trainer print 'trainer_type:', trainer.type print 'trainer_index_only:', trainer.index_only cost, train_op, predict_op, evaluate_op = self.foward( algo, trainer, learning_rate) #self.foward(algo, trainer, learning_rate) config = None if not FLAGS.show_device: config = tf.ConfigProto() else: config = tf.ConfigProto(log_device_placement=True) config.gpu_options.allocator_type = 'BFC' self.session = tf.Session(config=config) init = tf.initialize_all_variables() self.session.run(init) summary_writer = None if FLAGS.use_summary: tf.scalar_summary("cross_entropy", self.cost) if FLAGS.use_auc_op: tf.scalar_summary("auc", evaluate_op) merged_summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.summary_path, self.session.graph_def) #os.system('rm -rf ' + FLAGS.model) os.system('mkdir -p ' + FLAGS.model) self.save_info(model_path) for epoch in range(num_epochs): if epoch > 0 and FLAGS.shuffle: trainset = melt.load_dataset(trainset_file) self.train_(trainset) need_stop = self.test_(testset, epoch=epoch) if need_stop: print 'need stop as improve is smaller then %f' % FLAGS.min_improve break #print weight #@FIXME if epoch % FLAGS.save_epochs == 0 and not trainer.index_only: self.save_model(model_path, epoch) self.save_model(model_path) if FLAGS.calibrate: dataset = trainset if not FLAGS.calibrate_trainset: dataset = testset self.calibrate_(dataset) #@TODO may be test set is right? CalibratorFactory.Save(self.calibrator, model_path + '/calibrator.bin') #self.calibrator.Save(model_path + '/calibrator.bin') self.calibrator.SaveText(model_path + '/calibrator.txt') if FLAGS.use_summary: teX, teY = testset.full_batch() summary_str = self.session.run(merged_summary_op, feed_dict=melt.gen_feed_dict( self.trainer, self.algo, teX, teY, test_mode=True)) summary_writer.add_summary(summary_str, epoch)