def main(): parser = argparse.ArgumentParser() parser.add_argument('--price_dir', required=True) parser.add_argument('--k', type=int, required=True, help='number of months to look for gain') parser.add_argument('--min_raw_price', type=float, default=MIN_RAW_PRICE) parser.add_argument('--max_raw_price', type=float, default=MAX_RAW_PRICE) parser.add_argument('--raw_price_dir', help='raw price dir, required if min_raw_price or ' 'max_raw_price is specified') parser.add_argument('--membership_dir', help='dir to year indexed membership files') parser.add_argument('--fill', action='store_true') parser.add_argument('--gain_dir', required=True) args = parser.parse_args() util.configLogging() # Sanity checks. assert args.gain_dir.endswith(str(args.k)), ( 'gain_dir should be suffixed by k for safety') assert args.min_raw_price < args.max_raw_price, ( 'min_raw_price >= max_raw_price: %f vs %f' % ( args.min_raw_price, args.max_raw_price)) if args.min_raw_price > MIN_RAW_PRICE or args.max_raw_price < MAX_RAW_PRICE: assert args.raw_price_dir, 'must specify --raw_price_dir' computeGain(args.price_dir, args.k, args.min_raw_price, args.max_raw_price, args.raw_price_dir, args.membership_dir, args.fill, args.gain_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--price_dir', required=True) parser.add_argument('--k', type=int, required=True, help='number of months to look for gain') parser.add_argument('--min_raw_price', type=float, default=MIN_RAW_PRICE) parser.add_argument('--max_raw_price', type=float, default=MAX_RAW_PRICE) parser.add_argument('--raw_price_dir', help='raw price dir, required if min_raw_price or ' 'max_raw_price is specified') parser.add_argument('--membership_dir', help='dir to year indexed membership files') parser.add_argument('--fill', action='store_true') parser.add_argument('--gain_dir', required=True) args = parser.parse_args() util.configLogging() # Sanity checks. assert args.gain_dir.endswith(str( args.k)), ('gain_dir should be suffixed by k for safety') assert args.min_raw_price < args.max_raw_price, ( 'min_raw_price >= max_raw_price: %f vs %f' % (args.min_raw_price, args.max_raw_price)) if args.min_raw_price > MIN_RAW_PRICE or args.max_raw_price < MAX_RAW_PRICE: assert args.raw_price_dir, 'must specify --raw_price_dir' computeGain(args.price_dir, args.k, args.min_raw_price, args.max_raw_price, args.raw_price_dir, args.membership_dir, args.fill, args.gain_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gain_dir', required=True) parser.add_argument('--market_file', required=True) parser.add_argument('--egain_dir', required=True) args = parser.parse_args() util.configLogging() computeEgain(args.gain_dir, args.market_file, args.egain_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_file', required=True) parser.add_argument('--label_file', required=True) parser.add_argument('--meta_file', required=True) # If specified, will be used in the fit function. parser.add_argument('--weight_file') # If specified, will be used to filter --meta_file. # Eg, --meta_file may contain metadata for all available data # while --train_meta_file may contain metadata for all data # with min_raw_price >= 10 and part of SP500 membership. # In this case, only data within --train_meta_file will be # collected for training, but --meta_file is still needed # for joining with --data_file and --label_file. parser.add_argument('--train_meta_file') parser.add_argument('--yyyymm', required=True, help='last date of training period') parser.add_argument('--months', type=int, required=True, help='length of training period in months, ' 'use -1 to denote entire history') parser.add_argument('--model_def', required=True, help='string of model def; eg, "Model(alpha=0.5)"') parser.add_argument('--perc', type=float, default=1.0, help='if < 1, will randomly sample specified perc ' 'of data for training') parser.add_argument('--imputer_strategy', default='zero', help='strategy for filling in missing values') parser.add_argument('--model_file', required=True) parser.add_argument('--imputer_file', required=True) parser.add_argument('--tmp_data_file', required=True, help='location of tmp data file within specified ' 'training period; this can be used later for ' 'evaluation, or specify --delete_tmp_files ' 'to delete it upon finish') parser.add_argument('--tmp_label_file', required=True, help='location of tmp label file within specified ' 'training period; this can be used later for ' 'evaluation, or specify --delete_tmp_files ' 'to delete it upon finish') parser.add_argument('--tmp_weight_file') parser.add_argument('--delete_tmp_files', action='store_true') args = parser.parse_args() util.configLogging() if args.weight_file: assert args.tmp_weight_file, 'must specify --tmp_weight_file since --weight_file is specified' selectData(args.data_file, args.label_file, args.meta_file, args.weight_file, args.train_meta_file, args.yyyymm, args.months, args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file) trainModel(args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file, args.model_def, args.perc, args.imputer_strategy, args.model_file, args.imputer_file) if args.delete_tmp_files: deleteTmpFiles(args.tmp_data_file, args.tmp_label_file) # tmp_weight_file will not be used after this step so is not guarded by # --delete_tmp_files. if args.tmp_weight_file: os.remove(args.tmp_weight_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--meta_file', required=True) parser.add_argument('--min_pos', type=float) parser.add_argument('--max_neg', type=float) parser.add_argument('--min_pos_perc', type=float) parser.add_argument('--max_neg_perc', type=float) parser.add_argument('--label_file', required=True) parser.add_argument('--weight_power', type=float, default=1) parser.add_argument('--weight_file') util.configLogging() collect(parser.parse_args())
def main(): parser = argparse.ArgumentParser() parser.add_argument('--yahoo_dir', required=True) parser.add_argument('--k', type=int, required=True, help='number of days to look for gain') parser.add_argument('--fill', action='store_true') parser.add_argument('--gain_dir', required=True) args = parser.parse_args() util.configLogging() computeOpenGain(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--sf1_file', required=True, help='unzipped file of entire SF! database from quandl') parser.add_argument('--indicator_file', required=True, help='file of supported indicators in SF1') parser.add_argument('--raw_dir', required=True, help='output dir of raw files') parser.add_argument('--max_lines', type=int, default=0, help='max number of lines to process from sf1_file; ' 'only use this for debugging') args = parser.parse_args() util.configLogging() convertSf1Raw(args.sf1_file, args.indicator_file, args.raw_dir, args.max_lines)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--sf1_file', required=True, help='unzipped file of entire SF! database from quandl') parser.add_argument('--indicator_file', required=True, help='file of supported indicators in SF1') parser.add_argument('--raw_dir', required=True, help='output dir of raw files') parser.add_argument('--max_lines', type=int, default=0, help='max number of lines to process from sf1_file; ' 'only use this for debugging') args = parser.parse_args() util.configLogging() convertSf1Raw(args.sf1_file, args.indicator_file, args.raw_dir, args.max_lines)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gain_dir', required=True) parser.add_argument('--date_file') parser.add_argument('--max_neg', type=float, default=0.01) parser.add_argument('--min_pos', type=float, default=0.01) parser.add_argument('--feature_base_dir', required=True) parser.add_argument('--feature_list', required=True) parser.add_argument('--feature_stats', required=True, help='feature stats file with 1/99 percentiles ' 'to filter out bad feature values') parser.add_argument('--min_date', default='0000-00-00') parser.add_argument('--max_date', default='9999-99-99') # Most features have a max lag of one quarter. parser.add_argument('--window', type=int, default=120) parser.add_argument('--min_feature_perc', type=float, default=0.8, help='only use a feature vector if at least certain ' 'perc of features are populated') parser.add_argument('--data_file', required=True) parser.add_argument('--label_file', required=True) parser.add_argument('--rlabel_file', required=True) parser.add_argument('--meta_file', required=True) parser.add_argument('--weight_power', type=float, default=1.0) parser.add_argument('--weight_file', help='if specified, will assign a weight to each ' 'training sample with its distance to the ' 'pos/neg threshold') args = parser.parse_args() assert args.max_neg <= args.min_pos, 'max_neg > min_pos: %f vs %f' % ( args.max_neg, args.min_pos) util.configLogging() collectData(args.gain_dir, args.date_file, args.max_neg, args.min_pos, args.feature_base_dir, args.feature_list, args.feature_stats, args.min_date, args.max_date, args.window, args.min_feature_perc, args.data_file, args.label_file, args.rlabel_file, args.meta_file, args.weight_power, args.weight_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gain_dir', required=True) parser.add_argument('--feature_base_dir', required=True) parser.add_argument('--feature_list', required=True) parser.add_argument('--feature_stats', required=True, help='feature stats file with 1/99 percentiles ' 'to filter out bad feature values') parser.add_argument('--min_date', default='0000-00-00') parser.add_argument('--max_date', default='9999-99-99') # Most features have a max lag of one quarter. parser.add_argument('--window', type=int, default=120) parser.add_argument('--min_feature_perc', type=float, default=0.8, help='only use a feature vector if at least certain ' 'perc of features are populated') parser.add_argument('--data_file', required=True) parser.add_argument('--meta_file', required=True) args = parser.parse_args() util.configLogging() collectData(args.gain_dir, args.feature_base_dir, args.feature_list, args.feature_stats, args.min_date, args.max_date, args.window, args.min_feature_perc, args.data_file, args.meta_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', required=True) args = parser.parse_args() util.configLogging() runExperiment(args.config)
# Shortcut to util.run() with dry run option. def run(cmd, step=None): util.run(cmd, dry_run=DRY_RUN, step=step) # Shortcut to util.markDone(). def markDone(step): util.markDone(step) ############ ## Script ## ############ util.configLogging(LOG_LEVEL) # Prepare dirs. util.maybeMakeDirs([ SYMBOL_DIR, TICKER_DIR, YAHOO_SF1_DIR, SF1_RAW_DIR, SF1_PROCESSED_DIR, EOD_RAW_DIR, EOD_PROCESSED_DIR, YAHOO_PROCESSED_DIR, FEATURE_DIR, FEATURE_INFO_DIR, MISC_DIR, EOD_PRICE_DIR,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', required=True) args = parser.parse_args() util.configLogging() runExperiment(args.config)
logging.info('skipping step: %s' % step) return False # Shortcut to util.run() with dry run option. def run(cmd, step=None): util.run(cmd, dry_run=DRY_RUN, step=step) # Shortcut to util.markDone(). def markDone(step): util.markDone(step) ############ ## Script ## ############ util.configLogging(LOG_LEVEL) # Prepare dirs. util.maybeMakeDirs([ SYMBOL_DIR, TICKER_DIR, YAHOO_SF1_DIR, SF1_RAW_DIR, SF1_PROCESSED_DIR, EOD_RAW_DIR, EOD_PROCESSED_DIR, YAHOO_PROCESSED_DIR, FEATURE_DIR, FEATURE_INFO_DIR, MISC_DIR, EOD_PRICE_DIR,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_file', required=True) parser.add_argument('--label_file', required=True) parser.add_argument('--meta_file', required=True) # If specified, will be used in the fit function. parser.add_argument('--weight_file') # If specified, will be used to filter --meta_file. # Eg, --meta_file may contain metadata for all available data # while --train_meta_file may contain metadata for all data # with min_raw_price >= 10 and part of SP500 membership. # In this case, only data within --train_meta_file will be # collected for training, but --meta_file is still needed # for joining with --data_file and --label_file. parser.add_argument('--train_meta_file') parser.add_argument('--yyyymm', required=True, help='last date of training period') parser.add_argument('--months', type=int, required=True, help='length of training period in months, ' 'use -1 to denote entire history') parser.add_argument('--model_def', required=True, help='string of model def; eg, "Model(alpha=0.5)"') parser.add_argument('--perc', type=float, default=1.0, help='if < 1, will randomly sample specified perc ' 'of data for training') parser.add_argument('--imputer_strategy', default='zero', help='strategy for filling in missing values') parser.add_argument('--model_file', required=True) parser.add_argument('--imputer_file', required=True) parser.add_argument('--tmp_data_file', required=True, help='location of tmp data file within specified ' 'training period; this can be used later for ' 'evaluation, or specify --delete_tmp_files ' 'to delete it upon finish') parser.add_argument('--tmp_label_file', required=True, help='location of tmp label file within specified ' 'training period; this can be used later for ' 'evaluation, or specify --delete_tmp_files ' 'to delete it upon finish') parser.add_argument('--tmp_weight_file') parser.add_argument('--delete_tmp_files', action='store_true') args = parser.parse_args() util.configLogging() if args.weight_file: assert args.tmp_weight_file, 'must specify --tmp_weight_file since --weight_file is specified' selectData(args.data_file, args.label_file, args.meta_file, args.weight_file, args.train_meta_file, args.yyyymm, args.months, args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file) trainModel(args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file, args.model_def, args.perc, args.imputer_strategy, args.model_file, args.imputer_file) if args.delete_tmp_files: deleteTmpFiles(args.tmp_data_file, args.tmp_label_file) # tmp_weight_file will not be used after this step so is not guarded by # --delete_tmp_files. if args.tmp_weight_file: os.remove(args.tmp_weight_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_file', required=True) parser.add_argument('--label_file', required=True) parser.add_argument('--meta_file', required=True) # Similar to --train_meta_file in train_model.py parser.add_argument('--predict_meta_file') parser.add_argument('--model_dir', required=True) parser.add_argument('--model_prefix', required=True) parser.add_argument('--model_suffix', required=True) parser.add_argument('--imputer_dir', required=True) parser.add_argument('--imputer_prefix', required=True) parser.add_argument('--imputer_suffix', required=True) parser.add_argument('--prediction_window', type=int, required=True) parser.add_argument('--delay_window', type=int, required=True) parser.add_argument('--result_file', required=True) args = parser.parse_args() util.configLogging() # get dates for prediction with open(args.meta_file, 'r') as fp: lines = fp.read().splitlines() dates = set() for line in lines: tmp1, date, tmp2, tmp3 = line.split('\t') dates.add(date) dates = sorted(dates) # get model dates models = os.listdir(args.model_dir) model_dates = [] for model in models: assert model.startswith(args.model_prefix) assert model.endswith(args.model_suffix) date = model[len(args.model_prefix):-len(args.model_suffix)] assert len(date) == 8 # yyyymmdd model_dates.append(date) model_dates.sort() ofp = open(args.result_file, 'w') started = False # check no 'hole' in simulation period # In qd3 gain is calculated between day k+1 and day +1, # so delta should be prediction window + 1 to avoid # lookahead bias. delta = args.prediction_window + args.delay_window + 1 for date in dates: ymd = util.getPreviousYmd(date, delta) y, m, d = ymd.split('-') model_date = '%s%s%s' % (y, m, d) index = bisect.bisect(model_dates, model_date) - 1 if index < 0: assert not started continue model_date = model_dates[index] logging.info('predicting %s using model from %s' % (date, model_date)) model_name = '%s%s%s' % (args.model_prefix, model_date, args.model_suffix) imputer_name = '%s%s%s' % (args.imputer_prefix, model_date, args.imputer_suffix) model_file = '%s/%s' % (args.model_dir, model_name) imputer_file = '%s/%s' % (args.imputer_dir, imputer_name) assert os.path.isfile(imputer_file) started = True meta = prepareData(date, args.data_file, args.label_file, args.meta_file, args.predict_meta_file, TMP_DATA_FILE) data = numpy.loadtxt(TMP_DATA_FILE) assert data.shape[0] == len( meta), 'inconsistent data size: %d vs %d' % (data.shape[0], len(meta)) with open(imputer_file, 'rb') as fp: imputer = pickle.load(fp) data = imputer.transform(data) with open(model_file, 'rb') as fp: model = pickle.load(fp) if 'predict_proba' in dir(model): prob = model.predict_proba(data) prob = [item[1] for item in prob] else: prob = model.predict(data) assert len(prob) == len(meta) items = [[meta[i][0], meta[i][1], prob[i]] for i in range(len(prob))] items.sort(key=lambda item: item[2], reverse=True) print >> ofp, 'date: %s' % date for item in items: ticker, gain, score = item print >> ofp, '\t%s\t%f\t%f' % (ticker, gain, score) ofp.close() if os.path.isfile(TMP_DATA_FILE): os.remove(TMP_DATA_FILE)
def main(config): opt = parse_config(config, TRAIN) # update debug config (if in debug mode) if opt[DEBUG]: debug_config = { DATALOADER_NUM_WORKER: 0, NAME: DEBUG, LOG_EVERY: 1, VALID_EVERY: 1, NUM_EPOCH: 2 } opt.update(debug_config) console.log( '[red]>>>> [[ WARN ]] You are in debug mode, update configs. <<<<[/red]' ) console.log(debug_config) console.log( '[red]>>>> [[ WARN ]] You are in debug mode, update configs. <<<<[/red]' ) # logging console.log('Running config:', opt, log_locals=False) opt[LOG_DIRPATH], opt[IMG_DIRPATH] = configLogging(TRAIN, opt) pl_logger = logging.getLogger("lightning") pl_logger.propagate = False # init model: ModelClass = parse_model_class(opt[RUNTIME][MODELNAME]) ckpt = opt[CHECKPOINT_PATH] if ckpt: model = ModelClass.load_from_checkpoint(ckpt, opt=opt) console.log(f'Loading model from: {ckpt}') else: model = ModelClass(opt) # Loading data: transform = parseAugmentation(opt) training_dataset = ImagesDataset(opt, ds_type=DATA, transform=transform) trainloader = torch.utils.data.DataLoader( training_dataset, batch_size=opt[BATCHSIZE], shuffle=True, num_workers=opt[DATALOADER_NUM_WORKER], drop_last=True) valid_loader = None if opt[VALID_DATA] and opt[VALID_DATA][INPUT]: valid_dataset = ImagesDataset(opt, ds_type=VALID_DATA, transform=transform) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt[VALID_BATCHSIZE], shuffle=False, num_workers=opt[DATALOADER_NUM_WORKER]) console.log('Finish loading data.') # callbacks: checkpoint_callback = ModelCheckpoint( dirpath=opt[LOG_DIRPATH], save_last=True, save_weights_only=True, filename='{epoch:}-{step}', save_top_k=10, # save 10 model monitor=opt[CHECKPOINT_MONITOR], ) # trainer logger: mylogger = WandbLogger(name=opt[NAME], project='vielab', notes=None if not opt[COMMENT] else opt[COMMENT], tags=[opt[RUNTIME][MODELNAME], opt[DATA][NAME]], save_dir=ROOT_PATH) # init trainer: trainer = pl.Trainer( gpus=opt[GPU], distributed_backend='dp', # auto_select_gpus=True, max_epochs=opt[NUM_EPOCH], logger=mylogger, callbacks=[checkpoint_callback], precision=opt[RUNTIME_PRECISION], check_val_every_n_epoch=opt[VALID_EVERY]) # training loop global OPT OPT = copy.deepcopy(opt) trainer.fit(model, trainloader, val_dataloaders=valid_loader)