def dataset_creator(config): opt = config["opt"] hyper_params = config["hyper_params"] train_data, _ = data_utils.get_dataset("voc/2007", "train+validation") val_data, _ = data_utils.get_dataset("voc/2007", "test") if opt.with_voc12: voc_2012_data, _ = data_utils.get_dataset("voc/2012", "train+validation") train_data = train_data.concatenate(voc_2012_data) img_size = hyper_params["img_size"] train_data = train_data.map(lambda x: data_utils.preprocessing( x, img_size, img_size, augmentation.apply)) val_data = val_data.map( lambda x: data_utils.preprocessing(x, img_size, img_size)) data_shapes = data_utils.get_data_shapes() padding_values = data_utils.get_padding_values() train_data = train_data.shuffle(opt.batch_size * 4).padded_batch( opt.batch_size, padded_shapes=data_shapes, padding_values=padding_values) val_data = val_data.padded_batch(opt.batch_size, padded_shapes=data_shapes, padding_values=padding_values) prior_boxes = bbox_utils.generate_prior_boxes( hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"]) ssd_train_feed = train_utils.generator(train_data, prior_boxes, hyper_params) ssd_val_feed = train_utils.generator(val_data, prior_boxes, hyper_params) return ssd_train_feed, ssd_val_feed
from utils.data_utils import get_dataset from utils.preprocess import get_model_info_from_path from utils.models import EnsembleModel data_dir = 'data' # model_paths = ['models/models/a2c_dow29_steps100000_start2000-01-01_end2018-01-01.model','models/models/ddpg_dow29_steps100000_start2000-01-01_end2018-01-01.model','models/models/ppo_dow29_steps100000_start2000-01-01_end2018-01-01.model','models/models/sac_dow29_steps100000_start2000-01-01_end2018-01-01.model','models/models/td3_dow29_steps100000_start2000-01-01_end2018-01-01.model'] model_paths = 'models/models/ppo_nas29_steps1000000_start2005-01-01_end2018-11-28.model' start_date,split_date,data_type ,model = get_model_info_from_path(model_paths) end_date = '2020-12-31' # Model is tested from split_date to end_date # Get data df = get_dataset(data_dir,data_type,split_date,end_date) print(f'Testing from {start_date} to {end_date}') stock_dimension = len(df.tic.unique()) indicators = config.TECHNICAL_INDICATORS_LIST state_space = 1 + 2*stock_dimension + len(indicators)*stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 500, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space,
args = io_utils.handle_args() if args.smoke_test: ray.init(num_cpus=2) else: ray.init(address=args.address) if args.backbone == "mobilenet_v2": from models.ssd_mobilenet_v2 import get_model, init_model else: from models.ssd_vgg16 import get_model, init_model ssd_log_path = io_utils.get_log_path(args.backbone) ssd_model_path = io_utils.get_model_path(args.backbone) hyper_params = train_utils.get_hyper_params(args.backbone) _, info = data_utils.get_dataset("voc/2007", "train+validation") _, voc_2012_info = data_utils.get_dataset("voc/2012", "train+validation") voc_2012_total_items = data_utils.get_total_item_size( voc_2012_info, "train+validation") train_total_items = data_utils.get_total_item_size(info, "train+validation") val_total_items = data_utils.get_total_item_size(info, "test") if args.with_voc12: train_total_items += voc_2012_total_items labels = data_utils.get_labels(info) labels = ["bg"] + labels hyper_params["total_labels"] = len(labels) step_size_train = train_utils.get_step_size(train_total_items,
epochs = 150 load_weights = False with_voc_2012 = True backbone = args.backbone io_utils.is_valid_backbone(backbone) # if backbone == "mobilenet_v2": from models.ssd_mobilenet_v2 import get_model, init_model else: from models.ssd_vgg16 import get_model, init_model # hyper_params = train_utils.get_hyper_params(backbone) # # Prepare for training data train_data, info = data_utils.get_dataset("voc/2007", "train+validation") val_data, _ = data_utils.get_dataset("voc/2007", "test") train_total_items = data_utils.get_total_item_size(info, "train+validation") val_total_items = data_utils.get_total_item_size(info, "test") if with_voc_2012: voc_2012_data, voc_2012_info = data_utils.get_dataset( "voc/2012", "train+validation") voc_2012_total_items = data_utils.get_total_item_size( voc_2012_info, "train+validation") train_total_items += voc_2012_total_items train_data = train_data.concatenate(voc_2012_data) # # Get labels labels = data_utils.get_labels(info)
batch_size = 4 use_custom_images = False custom_image_path = "data/images/" # If you have trained faster rcnn model you can load weights from faster rcnn model load_weights_from_frcnn = False backbone = args.backbone io_utils.is_valid_backbone(backbone) if backbone == "mobilenet_v2": from models.rpn_mobilenet_v2 import get_model else: from models.rpn_vgg16 import get_model hyper_params = train_utils.get_hyper_params(backbone) test_data, dataset_info = data_utils.get_dataset("voc/2007", "test") labels = data_utils.get_labels(dataset_info) labels = ["bg"] + labels hyper_params["total_labels"] = len(labels) img_size = hyper_params["img_size"] data_types = data_utils.get_data_types() data_shapes = data_utils.get_data_shapes() padding_values = data_utils.get_padding_values() if use_custom_images: img_paths = data_utils.get_custom_imgs(custom_image_path) total_items = len(img_paths) test_data = tf.data.Dataset.from_generator( lambda: data_utils.custom_data_generator(img_paths, img_size, img_size ), data_types, data_shapes)
hyper_params = train_utils.get_hyper_params() img_size = hyper_params["img_size"] data_types = data_utils.get_data_types() data_shapes = data_utils.get_data_shapes() padding_values = data_utils.get_padding_values() if use_custom_images: img_paths = data_utils.get_custom_imgs(custom_image_path) total_items = len(img_paths) test_data = tf.data.Dataset.from_generator( lambda: data_utils.custom_data_generator(img_paths, img_size, img_size ), data_types, data_shapes) else: test_split = "train[80%:]" test_data, info = data_utils.get_dataset("the300w_lp", test_split) total_items = data_utils.get_total_item_size(info, test_split) test_data = test_data.map( lambda x: data_utils.preprocessing(x, img_size, img_size)) # test_data = test_data.padded_batch(batch_size, padded_shapes=data_shapes, padding_values=padding_values) model = blazeface.get_model(hyper_params) model_path = io_utils.get_model_path() model.load_weights(model_path) prior_boxes = bbox_utils.generate_prior_boxes( hyper_params["feature_map_shapes"], hyper_params["aspect_ratios"])
default="simplenn", help="Name of the type of fairness algorithm to use.") parser.add_argument("-dataset", choices=dataset_names, default="adult", help="Name of dataset to train on.") args = parser.parse_args() loaddir = None if args.load_dir is not None: loaddir = os.path.join(base_logdir, args.load_dir) logdir = increment_path(os.path.join(base_logdir, args.experiment_name, "run")) os.makedirs(logdir, exist_ok=True) print("Logging data to {}".format(logdir)) print("Loading {} dataset...".format(args.dataset)) train_dataset, validation_dataset = get_dataset(args.dataset, base_datadir=base_datadir) print("Launching Tensorboard.\nTo visualize, navigate to " "http://0.0.0.0:6006/\nTo close Tensorboard," " press ctrl+C") tensorboard_process = launch_tensorboard(logdir) # ===== SPECIFY HYPERPARAMETERS (INCLUDING CLASSIFIER-TYPE) ===== inputsize = train_dataset["data"].shape[1] layersizes = [100] classifier_type = "paritynn" hparams = { "classifier_type": classifier_type, "layersizes": layersizes, "inputsize": inputsize, } # =============================================================== print("Initializing classifier...")
def main(_): """ Builds the model and runs. """ tf.logging.set_verbosity(tf.logging.INFO) tx.utils.maybe_create_dir(FLAGS.output_dir) bert_pretrain_dir = 'bert_pretrained_models/%s' % FLAGS.config_bert_pretrain # Loads BERT model configuration if FLAGS.config_format_bert == "json": bert_config = model_utils.transform_bert_to_texar_config( os.path.join(bert_pretrain_dir, 'bert_config.json')) elif FLAGS.config_format_bert == 'texar': bert_config = importlib.import_module( 'bert_config_lib.config_model_%s' % FLAGS.config_bert_pretrain) else: raise ValueError('Unknown config_format_bert.') # Loads data processors = { "cola": data_utils.ColaProcessor, "mnli": data_utils.MnliProcessor, "mrpc": data_utils.MrpcProcessor, "xnli": data_utils.XnliProcessor, 'sst': data_utils.SSTProcessor } processor = processors[FLAGS.task.lower()]() num_classes = len(processor.get_labels()) num_train_data = len(processor.get_train_examples(config_data.data_dir)) tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join( bert_pretrain_dir, 'vocab.txt'), do_lower_case=FLAGS.do_lower_case) train_dataset = data_utils.get_dataset(processor, tokenizer, config_data.data_dir, config_data.max_seq_length, config_data.train_batch_size, mode='train', output_dir=FLAGS.output_dir) eval_dataset = data_utils.get_dataset(processor, tokenizer, config_data.data_dir, config_data.max_seq_length, config_data.eval_batch_size, mode='eval', output_dir=FLAGS.output_dir) test_dataset = data_utils.get_dataset(processor, tokenizer, config_data.data_dir, config_data.max_seq_length, config_data.test_batch_size, mode='test', output_dir=FLAGS.output_dir) iterator = tx.data.FeedableDataIterator({ 'train': train_dataset, 'eval': eval_dataset, 'test': test_dataset }) batch = iterator.get_next() input_ids = batch["input_ids"] segment_ids = batch["segment_ids"] batch_size = tf.shape(input_ids)[0] input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(input_ids, 0)), axis=1) # Builds BERT with tf.variable_scope('bert'): embedder = tx.modules.WordEmbedder(vocab_size=bert_config.vocab_size, hparams=bert_config.embed) word_embeds = embedder(input_ids) # Creates segment embeddings for each type of tokens. segment_embedder = tx.modules.WordEmbedder( vocab_size=bert_config.type_vocab_size, hparams=bert_config.segment_embed) segment_embeds = segment_embedder(segment_ids) input_embeds = word_embeds + segment_embeds # The BERT model (a TransformerEncoder) encoder = tx.modules.TransformerEncoder(hparams=bert_config.encoder) output = encoder(input_embeds, input_length) # Builds layers for downstream classification, which is also initialized # with BERT pre-trained checkpoint. with tf.variable_scope("pooler"): # Uses the projection of the 1st-step hidden vector of BERT output # as the representation of the sentence bert_sent_hidden = tf.squeeze(output[:, 0:1, :], axis=1) bert_sent_output = tf.layers.dense(bert_sent_hidden, config_downstream.hidden_dim, activation=tf.tanh) output = tf.layers.dropout(bert_sent_output, rate=0.1, training=tx.global_mode_train()) # Adds the final classification layer logits = tf.layers.dense( output, num_classes, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)) preds = tf.argmax(logits, axis=-1, output_type=tf.int32) accu = tx.evals.accuracy(batch['label_ids'], preds) # Optimization loss = tf.losses.sparse_softmax_cross_entropy(labels=batch["label_ids"], logits=logits) global_step = tf.Variable(0, trainable=False) # Builds learning rate decay scheduler static_lr = config_downstream.lr['static_lr'] num_train_steps = int(num_train_data / config_data.train_batch_size * config_data.max_train_epoch) num_warmup_steps = int(num_train_steps * config_data.warmup_proportion) lr = model_utils.get_lr( global_step, num_train_steps, # lr is a Tensor num_warmup_steps, static_lr) train_op = tx.core.get_train_op(loss, global_step=global_step, learning_rate=lr, hparams=config_downstream.opt) # Train/eval/test routine def _run(sess, mode): fetches = { 'accu': accu, 'batch_size': batch_size, 'step': global_step, 'loss': loss, } if mode == 'train': fetches['train_op'] = train_op while True: try: feed_dict = { iterator.handle: iterator.get_handle(sess, 'train'), tx.global_mode(): tf.estimator.ModeKeys.TRAIN, } rets = sess.run(fetches, feed_dict) if rets['step'] % 50 == 0: tf.logging.info('step:%d loss:%f' % (rets['step'], rets['loss'])) if rets['step'] == num_train_steps: break except tf.errors.OutOfRangeError: break if mode == 'eval': cum_acc = 0.0 nsamples = 0 while True: try: feed_dict = { iterator.handle: iterator.get_handle(sess, 'eval'), tx.context.global_mode(): tf.estimator.ModeKeys.EVAL, } rets = sess.run(fetches, feed_dict) cum_acc += rets['accu'] * rets['batch_size'] nsamples += rets['batch_size'] except tf.errors.OutOfRangeError: break tf.logging.info('dev accu: {}'.format(cum_acc / nsamples)) if mode == 'test': _all_preds = [] while True: try: feed_dict = { iterator.handle: iterator.get_handle(sess, 'test'), tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT, } _preds = sess.run(preds, feed_dict=feed_dict) _all_preds.extend(_preds.tolist()) except tf.errors.OutOfRangeError: break output_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_file, "w") as writer: writer.write('\n'.join(str(p) for p in _all_preds)) with tf.Session() as sess: # Loads pretrained BERT model parameters init_checkpoint = os.path.join(bert_pretrain_dir, 'bert_model.ckpt') model_utils.init_bert_checkpoint(init_checkpoint) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) # Restores trained model if specified saver = tf.train.Saver() if FLAGS.checkpoint: saver.restore(sess, FLAGS.checkpoint) iterator.initialize_dataset(sess) if FLAGS.do_train: iterator.restart_dataset(sess, 'train') _run(sess, mode='train') saver.save(sess, FLAGS.output_dir + '/model.ckpt') if FLAGS.do_eval: iterator.restart_dataset(sess, 'eval') _run(sess, mode='eval') if FLAGS.do_test: iterator.restart_dataset(sess, 'test') _run(sess, mode='test')
}) hparams_list[-1][hparam] = lamda n_epochs = 20 experiment_name = "comparisontest" # =========================================== masterdir = "/tmp/fairml-farm/" base_datadir = masterdir + "data/" os.makedirs(base_datadir, exist_ok=True) experiment_dir = increment_path( os.path.join(masterdir, "logs", experiment_name, "exp")) os.makedirs(experiment_dir) print("Logging experiments data to {}".format(experiment_dir)) print("Loading Adult dataset...") train_dataset, validation_dataset = get_dataset("adult", base_datadir=base_datadir) print("...dataset loaded.") inputsize = train_dataset["data"].shape[1] print("Launching Tensorboard.\nTo visualize, navigate to " "http://0.0.0.0:6006/\nTo close Tensorboard," " press ctrl+C") tensorboard_process = U.launch_tensorboard(experiment_dir) for hparams in hparams_list: if "experiment_name" in hparams: logdir = os.path.join(experiment_dir, hparams["experiment_name"]) else: logdir = increment_path( os.path.join(experiment_dir, hparams["classifier_type"])) expname = logdir.split('/')[ -1] # minor note: logdir shouldn't end with '/' print("Starting new experiment, logged at {}".format(logdir))
_, acc_2, _ = evaluate(tmp_net2, G_loss_fun, test_iter, Args) print('F: %.3f' % acc_1, 'TF: %.3f' % acc_2) flag = False if np.abs(acc_1 - acc_2) < 0.03: flag = True return ter_dict, flag else: return f_dict, flag if __name__ == '__main__': torch.manual_seed(Args.seed) C_iter, train_iter, test_iter, stats = data_utils.get_dataset(args=Args) # build global network G_net = Fed_Model() print(G_net) G_net.train() G_loss_fun = torch.nn.CrossEntropyLoss() # copy weights w_glob = G_net.state_dict() m = max(int(Args.frac * Args.num_C), 1) gv_acc = [] net_best = None val_acc_list, net_list = [], []
startdate = args.start_date splitdate = args.split_date enddate = args.end_date train_steps = args.train_steps modelName = '{}_{}_steps{}_start{}_end{}.model'.format(args.model, args.data_type, train_steps, startdate, splitdate) if args.data_type == 'dow290' or args.data_type == 'dow29w0': indicators = config.TECHNICAL_INDICATORS_LIST_W_CROSSINGS else: indicators = config.TECHNICAL_INDICATORS_LIST # Get data df_train = get_dataset(args.datadir, args.data_type, args.start_date, args.split_date) df_test = get_dataset(args.datadir, args.data_type, args.split_date, args.end_date) stock_dimension = len(df_train.tic.unique()) state_space = 1 + 2 * stock_dimension + len(indicators) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 500, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicators,
from utils import bbox_utils, data_utils, io_utils, train_utils, drawing_utils, landmark_utils import blazeface import random args = io_utils.handle_args() if args.handle_gpu: io_utils.handle_gpu_compatibility() batch_size = 32 epochs = 150 load_weights = False hyper_params = train_utils.get_hyper_params() train_split = "train[:80%]" val_split = "train[80%:]" train_data, info = data_utils.get_dataset("the300w_lp", train_split) val_data, _ = data_utils.get_dataset("the300w_lp", val_split) train_total_items = data_utils.get_total_item_size(info, train_split) val_total_items = data_utils.get_total_item_size(info, val_split) # img_size = hyper_params["img_size"] train_data = train_data.map(lambda x: data_utils.preprocessing( x, img_size, img_size, augmentation.apply)) val_data = val_data.map( lambda x: data_utils.preprocessing(x, img_size, img_size)) # data_shapes = data_utils.get_data_shapes() padding_values = data_utils.get_padding_values() train_data = train_data.shuffle(batch_size * 12).padded_batch( batch_size, padded_shapes=data_shapes, padding_values=padding_values)