def train(): # Load data logger.info("Loading data...") x_train, y_train = dp.load_data(TRAININGSET_DIR) x_val, y_val = dp.load_data(VALIDATION_DIR) d_train = xgb.DMatrix(x_train, label=y_train) d_val = xgb.DMatrix(x_val, label=y_val) watchlist = [(d_train, 'train'), (d_val, 'valid')] logger.info("Finish building BOW.") params_xgb = { 'objective': 'reg:linear', 'eta': 0.001, 'max_depth': 10, 'eval_metric': 'rmse' } # TODO model = xgb.train(params_xgb, d_train, 10000, evals=watchlist, early_stopping_rounds=20, verbose_eval=10) logger.info("Training model...") logger.info("Finish training. Saving model...") joblib.dump(model, MODEL_DIR)
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # prepare validate datasets ver_list = [] ver_name_list = [] for db in args.eval_datasets: print('begin db %s convert.' % db) data_set = load_data(db, args.image_size, args) ver_list.append(data_set) ver_name_list.append(db) # Load the model load_model(args.model) # Get input and output tensors, ignore phase_train_placeholder for it have default value. inputs_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") # image_size = images_placeholder.get_shape()[1] # For some reason this doesn't work for frozen graphs embedding_size = embeddings.get_shape()[1] for db_index in range(len(ver_list)): # Run forward pass to calculate embeddings print('\nRunnning forward pass on {} images'.format(ver_name_list[db_index])) start_time = time.time() data_sets, issame_list = ver_list[db_index] if data_sets.shape[0] % args.test_batch_size ==0: nrof_batches = data_sets.shape[0] // args.test_batch_size else: nrof_batches = data_sets.shape[0] // args.test_batch_size +1 emb_array = np.zeros((data_sets.shape[0], embedding_size)) for index in range(nrof_batches): start_index = index * args.test_batch_size end_index = min((index + 1) * args.test_batch_size, data_sets.shape[0]) feed_dict = {inputs_placeholder: data_sets[start_index:end_index, ...]} emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict) tpr, fpr, accuracy, val, val_std, far = evaluate(emb_array, issame_list, nrof_folds=args.eval_nrof_folds) duration = time.time() - start_time print("total time %.3fs to evaluate %d images of %s" % (duration, data_sets.shape[0], ver_name_list[db_index])) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) print('fpr and tpr: %1.3f %1.3f' % (np.mean(fpr, 0), np.mean(tpr, 0))) auc = metrics.auc(fpr, tpr) print('Area Under Curve (AUC): %1.3f' % auc) eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) print('Equal Error Rate (EER): %1.3f' % eer)
def train(): # Load data logger.info("Loading data...") x_train, y_train = dp.load_data(TRAININGSET_DIR) logger.info("Finish building BOW.") model = DecisionTreeRegressor(criterion="mse", splitter="best") logger.info("Training model...") model.fit(x_train, y_train) logger.info("Finish training. Saving model...") joblib.dump(model, MODEL_DIR)
def train(): # Load data logger.info("Loading data...") x_train, y_train = dp.load_data(TRAININGSET_DIR) logger.info("Finish building BOW.") model = SVR() logger.info("Training model...") model.fit(x_train, y_train) logger.info("Finish training. Saving model...") joblib.dump(model, MODEL_DIR)
def test(): logger.info("Loading data...") x_test, y_test = dp.load_data(TEST_DIR) logger.info("Loading model...") model = joblib.load(MODEL_DIR) logger.info("Predicting...") y_pred = model.predict(x_test) logger.info("Calculate Metrics...") pcc, doa = dp.evaluation(y_test, y_pred) rmse = mean_squared_error(y_test, y_pred)**0.5 r2 = r2_score(y_test, y_pred) logger.info("SVM: PCC {0:g} | DOA {1:g} | RMSE {2:g} | R2 {3:g}".format( pcc, doa, rmse, r2)) logger.info("All Done.")
args = get_parser() # define placeholder inputs = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) phase_train_placeholder = tf.placeholder_with_default( tf.constant(False, dtype=tf.bool), shape=None, name='phase_train') # prepare validate datasets ver_list = [] ver_name_list = [] for db in args.eval_datasets: # for db in ['lfw']: print('begin db %s convert.' % db) data_set = load_data(db, args.image_size, args) ver_list.append(data_set) ver_name_list.append(db) # identity the input, for inference inputs = tf.identity(inputs, 'input') w_init_method = slim.initializers.xavier_initializer() prelogits, net_points = inference(inputs, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # define sess gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
def my_loss(y_true, y_pred): """只要预测值即可,预测值包含batch个损失,求均值 """ return tf.reduce_mean(y_pred) if __name__ == '__main__': with tf.Graph().as_default(): args = get_parser() ver_list = [] ver_name_list = [] for db in args.eval_datasets: print('begin db %s convert.' % db) data_set = load_data(db, args.image_size, args.eval_db_path) ver_list.append(data_set) ver_name_list.append(db) # output file path check_path([ args.log_file_path, args.h5_best_path, args.h5_path, args.tflite_path, args.summary_path ]) # create log dir subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.log_file_path), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir)
model = Model(inputs=inputs, outputs=output) # from tensor to model adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy']) return model def get_layer2(embedding): return 0 if __name__ == '__main__': x_data, y_data = load_data() corpus = [sentence_seg(x) for x in x_data] tokenizer = Tokenizer() tokenizer.fit_on_texts(corpus) sequences = tokenizer.texts_to_sequences(corpus) max_sequence_len = max([len(s) for s in sequences]) print("max sequence lenght:", max_sequence_len) data = pad_sequences(sequences, maxlen=max_sequence_len) labels = to_categorical(np.asarray(y_data)) X_train, X_test, y_train, y_test = train_test_split(data, labels,
def main(): cur_time = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') print(f'\n\n\n***TRAINING SESSION START AT {cur_time}***\n\n\n') with tf.Graph().as_default(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" args = get_parser() # define global params global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) epoch_step = tf.Variable(name='epoch_step', initial_value=0, trainable=False) epoch = tf.Variable(name='epoch', initial_value=0, trainable=False) # def placeholders print(f'***Input of size: {args.image_size}') print( f'***Perform evaluation after each {args.validate_interval} on datasets: {args.eval_datasets}' ) inputs = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) labels = tf.placeholder(name='img_labels', shape=[ None, ], dtype=tf.int64) phase_train_placeholder = tf.placeholder_with_default( tf.constant(False, dtype=tf.bool), shape=None, name='phase_train') # prepare train dataset # the image is substracted 127.5 and multiplied 1/128. # random flip left right tfrecords_f = os.path.join(args.tfrecords_file_path, 'train.tfrecords') dataset = tf.data.TFRecordDataset(tfrecords_f) dataset = dataset.map(parse_function) # dataset = dataset.shuffle(buffer_size=args.buffer_size) dataset = dataset.batch(args.train_batch_size) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() # identity the input, for inference inputs = tf.identity(inputs, 'input') prelogits, net_points = inference( inputs, bottleneck_layer_size=args.embedding_size, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) # record the network architecture hd = open("./arch/txt/MobileFaceNet_architecture.txt", 'w') for key in net_points.keys(): info = '{}:{}\n'.format(key, net_points[key].get_shape().as_list()) hd.write(info) hd.close() embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Norm for the prelogits eps = 1e-5 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) # inference_loss, logit = cos_loss(prelogits, labels, args.class_number) w_init_method = slim.initializers.xavier_initializer() if args.loss_type == 'insightface': print( f'INSIGHTFACE LOSS WITH s={args.margin_s}, m={args.margin_m}') inference_loss, logit = insightface_loss(embeddings, labels, args.class_number, w_init_method, s=args.margin_s, m=args.margin_m) elif args.loss_type == 'cosine': inference_loss, logit = cosineface_loss(embeddings, labels, args.class_number, w_init_method) elif args.loss_type == 'combine': inference_loss, logit = combine_loss(embeddings, labels, args.train_batch_size, args.class_number, w_init_method) else: assert 0, 'loss type error, choice item just one of [insightface, cosine, combine], please check!' tf.add_to_collection('losses', inference_loss) # total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([inference_loss] + regularization_losses, name='total_loss') # define the learning rate schedule learning_rate = tf.train.piecewise_constant( epoch, boundaries=args.lr_schedule, values=[0.1, 0.01, 0.001, 0.0001, 0.00001], name='lr_schedule') # define sess gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping, gpu_options=gpu_options) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # calculate accuracy op pred = tf.nn.softmax(logit) correct_prediction = tf.cast( tf.equal(tf.argmax(pred, 1), tf.cast(labels, tf.int64)), tf.float32) Accuracy_Op = tf.reduce_mean(correct_prediction) # summary writer summary = tf.summary.FileWriter(args.summary_path, sess.graph) summaries = [] # add train info to tensorboard summary summaries.append(tf.summary.scalar('inference_loss', inference_loss)) summaries.append(tf.summary.scalar('total_loss', total_loss)) summaries.append(tf.summary.scalar('learning_rate', learning_rate)) summaries.append(tf.summary.scalar('training_acc', Accuracy_Op)) summary_op = tf.summary.merge(summaries) # train op train_op = train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), summaries, args.log_histograms) inc_global_step_op = tf.assign_add(global_step, 1, name='increment_global_step') inc_epoch_step_op = tf.assign_add(epoch_step, 1, name='increment_epoch_step') reset_epoch_step_op = tf.assign(epoch_step, 0, name='reset_epoch_step') inc_epoch_op = tf.assign_add(epoch, 1, name='increment_epoch') # record trainable variable hd = open("./arch/txt/trainable_var.txt", "w") for var in tf.trainable_variables(): hd.write(str(var)) hd.write('\n') hd.close() # init all variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # RELOAD CHECKPOINT FOR PRETRAINED MODEL # pretrained model path pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('***Pre-trained model: %s' % pretrained_model) if pretrained_model is None: # saver to load pretrained model or save model saver = tf.train.Saver(tf.trainable_variables() + [epoch, epoch_step, global_step], max_to_keep=args.saver_maxkeep) else: saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=args.saver_maxkeep) # lask checkpoint path checkpoint_path = None if args.ckpt_path: ckpts = os.listdir(args.ckpt_path) if 'checkpoint' in ckpts: ckpts.remove('checkpoint') ckpts_prefix = [x.split('_')[0] for x in ckpts] ckpts_prefix.sort(key=lambda x: int(x), reverse=True) # Get last checkpoint if len(ckpts_prefix) > 0: last_ckpt = f"{ckpts_prefix[0]}_MobileFaceNet.ckpt" checkpoint_path = os.path.expanduser( os.path.join(args.ckpt_path, last_ckpt)) print('***Last checkpoint: %s' % checkpoint_path) # load checkpoint model if checkpoint_path is not None: print('***Restoring checkpoint: %s' % checkpoint_path) saver.restore(sess, checkpoint_path) # load pretrained model elif pretrained_model: print('***Restoring pretrained model: %s' % pretrained_model) # ckpt = tf.train.get_checkpoint_state(pretrained_model) # print(ckpt) saver.restore(sess, pretrained_model) else: print('***No checkpoint or pretrained model found.') print('***Training from scratch') # output file path if not os.path.exists(args.log_file_path): os.makedirs(args.log_file_path) if not os.path.exists(args.ckpt_best_path): os.makedirs(args.ckpt_best_path) # prepare validate datasets ver_list = [] ver_name_list = [] print('***LOADING VALIDATION DATABASES..') for db in args.eval_datasets: print('\t- Loading database: %s' % db) data_set = load_data(db, args.image_size, args) ver_list.append(data_set) ver_name_list.append(db) cur_epoch, cur_global_step, cur_epoch_step = sess.run( [epoch, global_step, epoch_step]) print('****************************************') print( f'Continuous training on EPOCH={cur_epoch}, GLOBAL_STEP={cur_global_step}, EPOCH_STEP={cur_epoch_step}' ) print('****************************************') total_losses_per_summary = [] inference_losses_per_summary = [] train_acc_per_summary = [] avg_total_loss_per_summary = 0 avg_inference_loss_per_summary = 0 avg_train_acc_per_summary = 0 for i in range(cur_epoch, args.max_epoch + 1): sess.run(iterator.initializer) # Trained steps are ignored print(f'Skipping {cur_epoch_step} trained step..') start = time.time() for _j in range(cur_epoch_step): images_train, labels_train = sess.run(next_element) if _j % 1000 == 0: end = time.time() iter_time = end - start start = time.time() print(f'{_j}, time: {iter_time} seconds') print('***Traing started***') while True: try: start = time.time() images_train, labels_train = sess.run(next_element) feed_dict = { inputs: images_train, labels: labels_train, phase_train_placeholder: True } _, total_loss_val, inference_loss_val, reg_loss_val, _, acc_val = \ sess.run([train_op, total_loss, inference_loss, regularization_losses, inc_epoch_step_op, Accuracy_Op], feed_dict=feed_dict) end = time.time() pre_sec = args.train_batch_size / (end - start) cur_global_step += 1 cur_epoch_step += 1 total_losses_per_summary.append(total_loss_val) inference_losses_per_summary.append(inference_loss_val) train_acc_per_summary.append(acc_val) # print training information if cur_global_step > 0 and cur_global_step % args.show_info_interval == 0: print( 'epoch %d, total_step %d, epoch_step %d, total loss %.2f , inference loss %.2f, reg_loss %.2f, training accuracy %.6f, rate %.3f samples/sec' % (i, cur_global_step, cur_epoch_step, total_loss_val, inference_loss_val, np.sum(reg_loss_val), acc_val, pre_sec)) # save summary if cur_global_step > 0 and cur_global_step % args.summary_interval == 0: feed_dict = { inputs: images_train, labels: labels_train, phase_train_placeholder: True } summary_op_val = sess.run(summary_op, feed_dict=feed_dict) summary.add_summary(summary_op_val, cur_global_step) avg_total_loss_per_summary = sum( total_losses_per_summary) / len( total_losses_per_summary) total_losses_per_summary = [] avg_inference_loss_per_summary = sum( inference_losses_per_summary) / len( inference_losses_per_summary) inference_losses_per_summary = [] avg_train_acc_per_summary = sum( train_acc_per_summary) / len(train_acc_per_summary) train_acc_per_summary = [] # Create a new Summary object with your measure summary2 = tf.Summary() summary2.value.add( tag='avg_total_loss', simple_value=avg_total_loss_per_summary) summary2.value.add( tag='avg_inference_loss', simple_value=avg_inference_loss_per_summary) summary2.value.add( tag='avg_train_acc', simple_value=avg_train_acc_per_summary) # Add it to the Tensorboard summary writer # Make sure to specify a step parameter to get nice graphs over time summary.add_summary(summary2, cur_global_step) # save ckpt files if cur_global_step > 0 and cur_global_step % args.ckpt_interval == 0: filename = '{:d}_MobileFaceNet'.format( cur_global_step) + '.ckpt' filename = os.path.join(args.ckpt_path, filename) saver.save(sess, filename) # validate if cur_global_step > 0 and cur_global_step % args.validate_interval == 0: print( '-------------------------------------------------' ) print('\nIteration', cur_global_step, 'validating...') for db_index in range(len(ver_list)): start_time = time.time() data_sets, issame_list = ver_list[db_index] emb_array = np.zeros( (data_sets.shape[0], args.embedding_size)) if data_sets.shape[0] % args.test_batch_size == 0: nrof_batches = data_sets.shape[ 0] // args.test_batch_size else: nrof_batches = data_sets.shape[ 0] // args.test_batch_size + 1 for index in range( nrof_batches ): # actual is same multiply 2, test data total start_index = index * args.test_batch_size end_index = min( (index + 1) * args.test_batch_size, data_sets.shape[0]) feed_dict = { inputs: data_sets[start_index:end_index, ...], phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) tpr, fpr, accuracy, val, val_std, far = evaluate( emb_array, issame_list, nrof_folds=args.eval_nrof_folds) duration = time.time() - start_time print( "---Total time %.3fs to evaluate %d images of %s" % (duration, data_sets.shape[0], ver_name_list[db_index])) print('\t- Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) print( '\t- Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) print('\t- FPR and TPR: %1.3f %1.3f' % (np.mean(fpr, 0), np.mean(tpr, 0))) auc = metrics.auc(fpr, tpr) print('\t- Area Under Curve (AUC): %1.3f' % auc) # eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) # print('Equal Error Rate (EER): %1.3f\n' % eer) with open( os.path.join( args.log_file_path, '{}_result.txt'.format( ver_name_list[db_index])), 'at') as f: f.write('%d\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\n' % (cur_global_step, np.mean(accuracy), val, val_std, far, auc)) if ver_name_list[db_index] == 'lfw' and np.mean( accuracy) > 0.994: print('High accuracy: %.5f' % np.mean(accuracy)) filename = 'MobileFaceNet_iter_best_{:d}'.format( cur_global_step) + '.ckpt' filename = os.path.join( args.ckpt_best_path, filename) saver.save(sess, filename) print( '---------------------------------------------------' ) except tf.errors.OutOfRangeError: _, _ = sess.run([inc_epoch_op, reset_epoch_step_op]) # Save checkpoint filename = '{:d}_MobileFaceNet'.format( cur_global_step) + '.ckpt' filename = os.path.join(args.ckpt_path, filename) saver.save(sess, filename) cur_epoch_step = 0 print("\n\n-------End of epoch %d\n\n" % i) break
sys.path.append('..') import models.graph as mg import scipy.sparse from utils import data_process, sparse from utils import configs, metrics # Set random seed seed = 123 np.random.seed(seed) tf.set_random_seed(seed) train_ratio = 0.03 dataset = configs.FILES.cora print("train_ratio:",train_ratio) x, _, adj_norm, labels, train_indexes, validation_indexes, test_indexes = data_process.load_data(dataset, str(train_ratio), x_flag='feature') node_num = adj_norm.shape[0] label_num = labels.shape[1] adj_norm_tuple = sparse.sparse_to_tuple(scipy.sparse.coo_matrix(adj_norm)) feat_x_nn_tuple = sparse.sparse_to_tuple(scipy.sparse.coo_matrix(x)) # node-node network train and validate masks nn_train_mask = np.zeros([node_num,]) nn_validation = np.zeros([node_num,]) nn_test_mask = np.zeros([node_num,]) for i in train_indexes: nn_train_mask[i] = 1
log_histograms = False prelogits_norm_loss_factor = 2e-5 prelogits_norm_p = 1.0 max_epoch = 12 image_size = [112, 112] embedding_size = 128 lr_schedule = [4, 7, 9, 11] values = [0.1, 0.01, 0.001, 0.0001, 0.00001] # prepare validate datasets ver_list = [] ver_name_list = [] for db in eval_datasets: print('begin db %s convert.' % db) data_set = load_data(db, image_size, eval_db_path) ver_list.append(data_set) ver_name_list.append(db) # output file path if not os.path.exists(log_file_path): os.makedirs(log_file_path) if not os.path.exists(ckpt_best_path): os.makedirs(ckpt_best_path) if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) # create log dir subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(log_file_path), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir)
epoch + 1, accuracy, loss)) losses_validate.append(loss) accuracies_validate.append(accuracy) plot_save_loss( losses, losses_validate, outputDir + '/loss_csnn_pretrain_2_csnn_layers_run{}_affine_false.png'.format(run)) plot_save_acc( accuracies, accuracies_validate, outputDir + '/acc_csnn_pretrain_2_csnn_layers_run{}_affine_false.png'.format(run)) print( 'Done pre-train, best validation acc {:.4f}'.format(bestValidationAcc)) if __name__ == "__main__": # ngsim data (x_train, y_train, x_validate, y_validate, x_ood) = load_data() dim = x_train.shape[1] x_train = x_train / np.sqrt(dim) x_validate = x_validate / np.sqrt(dim) x_ood = x_ood / np.sqrt(dim) x_combined = np.concatenate((x_validate, x_ood)) label_ood = np.zeros(x_combined.shape[0]) label_ood[x_validate.shape[0]:] = 1 ds_train = torch.utils.data.TensorDataset( torch.from_numpy(x_train).float(), F.one_hot(torch.from_numpy(y_train)).float()) ds_test = torch.utils.data.TensorDataset( torch.from_numpy(x_validate).float(),