parser.add_argument('-dataset_faces_folder', default=DEFAULT_DATA_FACES_PATH, help='Path to the images file') parser.add_argument('-dataset_audios_folder', default=DEFAULT_DATA_AUDIOS_PATH, help='Path to the audios file') parser.add_argument('-checkpoint_dir', default=DEFAULT_CHECKPOINT_DIR, help='Model checkpoint to use') parser.add_argument('-log_dir', default=DEFAULT_LOG_DIR, help='Model checkpoint to use') parser.add_argument('-resume', default="True", help='Resume training ("True" or "False")') args = parser.parse_args() if args.resume == "False": if tf.gfile.Exists(args.log_dir): tf.gfile.DeleteRecursively(args.log_dir) tf.gfile.MakeDirs(args.log_dir) if not os.path.isdir(os.path.dirname(args.checkpoint_dir)): os.mkdir(os.path.dirname(args.checkpoint_dir)) train(batch_size=16, epochs=10, dataset=DataInput(args.dataset_faces_folder, args.dataset_audios_folder, "train"), log_dir=args.log_dir)
flags.DEFINE_float('decay_rate', 0.75, 'decay rate, default: 0.75') flags.DEFINE_float('keep_prob', 0.5, 'keep_prob for training, default: 0.5') flags.DEFINE_integer('batch_size', 50, 'batch_size') flags.DEFINE_integer('decay_step', 1000, 'decay_step, default: 1000') flags.DEFINE_integer('valid_step', 500, 'valid_step, default: 500') flags.DEFINE_float('last_f1', 0.10, 'if valid_f1 > last_f1, save new model. default: 0.10') FLAGS = flags.FLAGS lr = FLAGS.lr last_f1 = FLAGS.last_f1 epoch = FLAGS.max_max_epoch train_batch_size = FLAGS.batch_size checkpoint_dir = '/Users/slade/Documents/YMM/Code/tf/model/ckpt' gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: model = Model(args) # init variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) time0 = time.time() for batch in tqdm(range(epoch)): global_step = sess.run(model.global_step) for _, uij in DataInput(train_set, train_batch_size): # training feed_dict = {model.inputs: uij, model.keep_prob: FLAGS.keep_prob, model.lr: lr} summary, _cost, _, _ = sess.run(train_fetches, feed_dict) # the cost is the mean cost of one batch
from data_input import DataInput from PIL import Image import numpy as np from scipy.misc import imsave from skimage.exposure import histogram data_path_faces = "/storage/dataset" if __name__ == '__main__': threshold = 700 dataset = DataInput("/storage/dataset", "/storage/dataset_videos/cropped_videos/outputb", "train") items_faces, items_audio = dataset.get_items() input_images = np.empty([len(items_faces), 64, 64, 3]) count = 0 index = [0, 3, 6, 8, 9, 17, 21, 29] references = np.empty(shape=[len(index), 64, 10, 3]) hist_references = np.empty(shape=[len(index), 256]) bins_references = np.empty(shape=[len(index), 257]) ind_count = 0 for ind in index: reference = Image.open(items_faces[ind]) reference = np.asarray(reference, dtype=float) reference = reference[:, 0:10, :] references[ind_count] = reference hist_reference, bins_reference = np.histogram(reference, bins=256, range=(0, 255)) hist_references[ind_count] = hist_reference bins_references[ind_count] = bins_reference
def main(): config = json.load(open("config.json", "r")) DATA_PATH = config["DATA_PATH"] INITIAL_LR = float(config["INITIAL_LR"]) DECAY_STEPS_LR = int(config["DECAY_STEPS_LR"]) DECAY_FACTOR_LR = float(config["DECAY_FACTOR_LR"]) BATCH_SIZE = int(config["BATCH_SIZE"]) NUM_STEPS = int(config["NUM_STEPS"]) OUTPUT_DIR = config["OUTPUT_DIR"] VAL_SET_SIZE = int(config["VAL_SET_SIZE"]) KEEP_DROPOUT_PROB = float(config["KEEP_DROPOUT_PROB"]) WEIGHT_DECAY = float(config["WEIGHT_DECAY"]) MODEL = config["MODEL"] AUGMENT_PROB = float(config["AUGMENT_PROB"]) LOSS = config["LOSS"] # create model output folder and copy corresponding config file to it now = datetime.now() current_time = now.strftime("%D_%H%M%S") current_time = current_time.replace("/", "") model_path = os.path.join(OUTPUT_DIR, "_".join([MODEL, current_time])) if not os.path.exists(model_path): os.mkdir(model_path) shutil.copy("config.json", os.path.join(model_path, "config.json")) shutil.copy("CNN_models/%s.py" % MODEL, os.path.join(model_path, "model.py")) # placeholders learning_rate_ph = tf.placeholder(tf.float32, shape=[], name="learning_rate_ph") images_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="input_images_ph") labels_ph = tf.placeholder(tf.int32, shape=[None], name="labels_ph") accuracy_ph = tf.placeholder(tf.float32, shape=[], name="accuracy_ph") accuracy_per_class_phs = [tf.placeholder(tf.float32, shape=[], name="accuracy_per_classs/class_%s_ph" % str(i)) for i in range(10)] training_ph = tf.placeholder(tf.bool, shape=[], name="training_ph") # choose model if MODEL == "simple_model_1": logits = simple_model_1(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph) elif MODEL == "simple_model_2": logits = simple_model_2(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph) elif MODEL == "inception": logits = inception(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph) elif MODEL == "resnet": logits = resnet(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph) elif MODEL == "inception_resnet": logits = inception_resnet(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph) # create loss if LOSS == "CROSS_ENTROPY": loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_ph, name="c_entropy")) elif LOSS == "CROSS_ENTROPY_WEIGHTED": class_weights = tf.constant([2, 1, 2, 1, 2, 1, 2, 1, 1, 1]) weights = tf.gather(class_weights, labels_ph) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels_ph, logits=logits, weights=weights) elif LOSS == "FOCAL": gamma = 2 preds = tf.nn.softmax(logits, dim=-1) labels_one_hot = tf.one_hot(labels_ph, depth=preds.shape[1]) loss = -labels_one_hot * ((1 - preds) ** gamma) * tf.log(preds) loss = tf.reduce_mean(tf.reduce_sum(loss, axis=1)) optimizer = tf.train.AdamOptimizer(learning_rate_ph) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) # create input reader object data_input = DataInput(DATA_PATH, BATCH_SIZE, VAL_SET_SIZE, AUGMENT_PROB) val_images, val_labels = data_input.get_val_set() # saver saver = tf.train.Saver() # summaries tf.summary.scalar("loss", loss) tf.summary.scalar("learning_rate", learning_rate_ph) tf.summary.scalar("accuracy", accuracy_ph) for i in range(10): tf.summary.scalar("accuracy_per_class/class_%s" % str(i), accuracy_per_class_phs[i]) summary_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(os.path.join(model_path, "train"), sess.graph) val_writer = tf.summary.FileWriter(os.path.join(model_path, "validation"), sess.graph) # number of parameters total_parameters = 0 for variable in tf.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("total_params: " + str(total_parameters)) train_summary_pred = [] train_summary_labels = [] for i in range(1, NUM_STEPS+1): print("Step: " + str(i)) train_images, train_labels = data_input.get_batch() power = i // DECAY_STEPS_LR learning_rate = INITIAL_LR * DECAY_FACTOR_LR ** power train_feed_dict = {images_ph: train_images, labels_ph: train_labels, learning_rate_ph: learning_rate, training_ph: True} sess.run(train_op, feed_dict=train_feed_dict) if i % 5000 == 0: saver.save(sess, os.path.join(model_path, "model.ckpt"), i) if i % 4 == 0: train_logits = sess.run(logits, feed_dict={images_ph: train_images, training_ph: False}) pred = np.argmax(train_logits, axis=1) train_summary_pred += [p for p in pred] train_summary_labels += [l for l in train_labels] if i % 500 == 0: train_summary_pred = np.array(train_summary_pred) train_summary_labels = np.array(train_summary_labels) hits = train_summary_pred == train_summary_labels accuracy = np.round(np.sum(hits) / len(hits) * 100, decimals=2) accuracy_per_class = [] for cl in range(10): accuracy_cl = np.round(np.sum(hits[train_summary_labels == cl]) / np.sum(train_summary_labels == cl) * 100, decimals=2) accuracy_per_class.append(accuracy_cl) train_feed_dict[accuracy_ph] = accuracy for cl in range(10): train_feed_dict[accuracy_per_class_phs[cl]] = accuracy_per_class[cl] summary_train = sess.run(summary_op, feed_dict=train_feed_dict) train_writer.add_summary(summary_train, i) train_summary_labels = [] train_summary_pred = [] # validation summary val_logits = sess.run(logits, feed_dict={images_ph: val_images, training_ph: False}) pred = np.argmax(val_logits, axis=1) hits = pred == val_labels accuracy = np.round(np.sum(hits) / len(pred) * 100, decimals=2) accuracy_per_class = [] for cl in range(10): accuracy_cl = np.round(np.sum(hits[val_labels == cl]) / np.sum(val_labels == cl) * 100, decimals = 2) accuracy_per_class.append(accuracy_cl) val_feed_dict = {images_ph: val_images, labels_ph: val_labels, accuracy_ph: accuracy, learning_rate_ph: learning_rate, training_ph: True} for cl in range(10): val_feed_dict[accuracy_per_class_phs[cl]] = accuracy_per_class[cl] summary_val = sess.run(summary_op, feed_dict=val_feed_dict) val_writer.add_summary(summary_val, i)
# ##========================= train LSGAN =========================### summary_str, gLoss, dLoss, _, _ = sess.run([summary, g_loss, d_loss, g_optim, d_optim], feed_dict={images: input_images, z: input_z, y_gan_real: labels_real, y_gan_fake: labels_fake, y_generator: labels_generator}) print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." % (j, iteration, gLoss, dLoss)) summary_writer.add_summary(summary_str, iteration) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Predict script') parser.add_argument('-dataset_faces_folder', default=DEFAULT_DATA_FACES_PATH, help='Path to the images file') parser.add_argument('-dataset_audios_folder', default=DEFAULT_DATA_AUDIOS_PATH, help='Path to the audios file') parser.add_argument('-checkpoint_dir', default=DEFAULT_CHECKPOINT_DIR, help='Model checkpoint to use') parser.add_argument('-log_dir', default=DEFAULT_LOG_DIR, help='Model checkpoint to use') parser.add_argument('-resume', default="True", help='Resume training ("True" or "False")') args = parser.parse_args() if args.resume == "False": if tf.gfile.Exists(args.log_dir): tf.gfile.DeleteRecursively(args.log_dir) tf.gfile.MakeDirs(args.log_dir) if not os.path.isdir(os.path.dirname(args.checkpoint_dir)): os.mkdir(os.path.dirname(args.checkpoint_dir)) train(batch_size=16, epochs=10, dataset=DataInput(args.dataset_faces_folder, args.dataset_audios_folder, "train"), log_dir=args.log_dir) t
logging = tf.logging flags = tf.flags flags.DEFINE_bool("verbose", False, "To talk or not to talk") flags.DEFINE_string("save_path", None, "Model output directory") flags.DEFINE_string("config_file", None, "Model config file") FLAGS = flags.FLAGS if __name__ == "__main__": if not tf.gfile.Exists('./save'): tf.gfile.MkDir('./save') # Config stuff config = get_config(FLAGS) data_input = DataInput(config) train_batches = data_input.train_epoch_size val_batches = data_input.val_epoch_size # Model building if config.model_type == 'dmnn': if config.model_version == 'v1': model_wrap = DMNNv1(config) if FLAGS.verbose: print('DMNN model:') print(model_wrap.model.summary()) if config.epoch > 0: model_wrap.model = restore_keras_model( model_wrap.model, config.save_path + '_weights.hdf5')
logging = tf.logging flags = tf.flags flags.DEFINE_bool("verbose", False, "To talk or not to talk") flags.DEFINE_string("save_path", None, "Model output directory") flags.DEFINE_string("config_file", None, "Model config file") FLAGS = flags.FLAGS if __name__ == "__main__": _reset_rand_seed() if not tf.gfile.Exists('./save'): tf.gfile.MkDir('./save') # Config stuff config = get_config(FLAGS) data_input = DataInput(config) _reset_rand_seed() train_batches = data_input.train_epoch_size train_generator = data_input.batch_generator(True) val_batches = data_input.val_epoch_size val_generator = data_input.batch_generator(False) # Model building if config.model_type == 'motiongan': model_wrap = get_model(config) if FLAGS.verbose: print('Discriminator model:') print(model_wrap.disc_model.summary()) print('Generator model:') print(model_wrap.gen_model.summary())
flags.DEFINE_string("config_file", "motiongan_v1_fae_h36", "Model config file") FLAGS = flags.FLAGS def _reset_rand_seed(): seed = 42 np.random.seed(seed) if __name__ == "__main__": # Config stuff config = get_config(FLAGS) # config.only_val = True config.normalize_data = False # config.pick_num = 0 data_input = DataInput(config) _reset_rand_seed() n_batches = 4 n_splits = 32 print('Plotting %d batches in %d splits for the %s dataset' % (n_batches, n_splits, config.data_set)) for b in range(n_batches): labs_batch, poses_batch = data_input.batch_generator(False).next() n_seqs = (config.batch_size // n_splits) for i in trange(n_splits): plot_seq_gif( poses_batch[i * n_seqs:(i + 1) * n_seqs, :, :, :3], labs_batch[i * n_seqs:(i + 1) * n_seqs, ...],
def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n")
try: iteration = 0 while not coord.should_stop(): iteration += 1 # ##========================= train SRGAN =========================### kt, mGlobal, _, _ = sess.run( [k_update, m_global, g_optim, d_optim]) print("kt: %.8f Mglobal: %.8f" % (kt, mGlobal)) summary_str = sess.run(summary) summary_writer.add_summary(summary_str, iteration) summary_writer.flush() # ##========================= evaluate data =========================### except tf.errors.OutOfRangeError: print('Done -- epoch limit reached') finally: coord.request_stop() coord.join(threads) if __name__ == '__main__': data_path = "/storage/dataset_videos/audio2faces_dataset/" log_dir = "/storage/irina/logs" train(batch_size=16, epochs=1000, dataset=DataInput(data_path, "train"), log_dir=log_dir)
nrows=3629, encoding='utf-8', dtype={'TICKER_SYMBOL': 'str'}, usecols=[1, 6, 8], parse_dates=[2]) market_info = market_info[market_info.TICKER_SYMBOL.isin(pre_list.TICKER_SYMBOL)].\ assign(MARKET_VALUE = market_info.MARKET_VALUE/10e7).reset_index(drop=True) ### 宏观经济数据 macro_info = pd.read_excel(path%'fddc1_data/Macro&Industry.xlsx',header=0,\ sheet_name='INDIC_DATA',encoding='utf8',dtype = {'indic_id': 'str'},\ parse_dates=[1],usecols=[0,4,5],index_col='PERIOD_DATE')['20101231':] macro_info.index = macro_info.index + datetime.timedelta(85) ### 财务数据 df_lst = DataInput(path='../../fddc1_data/financial_data/%s', comlist=pre_list.TICKER_SYMBOL, last=False) #df_lst.to_csv(path%'/47_152/data/df_fst.csv',index=False) #df_lst = pd.read_csv(path%'/47_152/data/df_lst.csv',dtype = {'TICKER_SYMBOL':'str'}) ### 季度化数据 df_normal = GroupSeries(df_lst, dropnan=True) ### 添加宏观数据 df_model_macro = AddMacroData(df_normal, macro_info) ### 模型训练 df_xgb_ind = RevenuePre(df_model_macro) df_predict_ind = RevenueTran(df_model_macro, df_xgb_ind, market_info, [1.24, 1.07, 1.00, 1.15], 0.5, 0.75, 0.24) df_submit_ind = RevenueCom(df_predict_ind, pre_list) ### 行业营收模型
def __init__(self): self.data = DataInput().inputFunction() print("\n\n" + self.bold_text_start + "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end + "\n\n")