コード例 #1
0
ファイル: compute_gain.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--price_dir', required=True)
  parser.add_argument('--k', type=int, required=True,
                      help='number of months to look for gain')
  parser.add_argument('--min_raw_price', type=float, default=MIN_RAW_PRICE)
  parser.add_argument('--max_raw_price', type=float, default=MAX_RAW_PRICE)
  parser.add_argument('--raw_price_dir',
                      help='raw price dir, required if min_raw_price or '
                           'max_raw_price is specified')
  parser.add_argument('--membership_dir',
                      help='dir to year indexed membership files')
  parser.add_argument('--fill', action='store_true')
  parser.add_argument('--gain_dir', required=True)
  args = parser.parse_args()
  util.configLogging()
  # Sanity checks.
  assert args.gain_dir.endswith(str(args.k)), (
      'gain_dir should be suffixed by k for safety')
  assert args.min_raw_price < args.max_raw_price, (
      'min_raw_price >= max_raw_price: %f vs %f' % (
      args.min_raw_price, args.max_raw_price))
  if args.min_raw_price > MIN_RAW_PRICE or args.max_raw_price < MAX_RAW_PRICE:
    assert args.raw_price_dir, 'must specify --raw_price_dir'
  computeGain(args.price_dir, args.k, args.min_raw_price, args.max_raw_price,
              args.raw_price_dir, args.membership_dir, args.fill, args.gain_dir)
コード例 #2
0
ファイル: compute_gain.py プロジェクト: galabing/qd2
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--price_dir', required=True)
    parser.add_argument('--k',
                        type=int,
                        required=True,
                        help='number of months to look for gain')
    parser.add_argument('--min_raw_price', type=float, default=MIN_RAW_PRICE)
    parser.add_argument('--max_raw_price', type=float, default=MAX_RAW_PRICE)
    parser.add_argument('--raw_price_dir',
                        help='raw price dir, required if min_raw_price or '
                        'max_raw_price is specified')
    parser.add_argument('--membership_dir',
                        help='dir to year indexed membership files')
    parser.add_argument('--fill', action='store_true')
    parser.add_argument('--gain_dir', required=True)
    args = parser.parse_args()
    util.configLogging()
    # Sanity checks.
    assert args.gain_dir.endswith(str(
        args.k)), ('gain_dir should be suffixed by k for safety')
    assert args.min_raw_price < args.max_raw_price, (
        'min_raw_price >= max_raw_price: %f vs %f' %
        (args.min_raw_price, args.max_raw_price))
    if args.min_raw_price > MIN_RAW_PRICE or args.max_raw_price < MAX_RAW_PRICE:
        assert args.raw_price_dir, 'must specify --raw_price_dir'
    computeGain(args.price_dir, args.k, args.min_raw_price, args.max_raw_price,
                args.raw_price_dir, args.membership_dir, args.fill,
                args.gain_dir)
コード例 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gain_dir', required=True)
    parser.add_argument('--market_file', required=True)
    parser.add_argument('--egain_dir', required=True)
    args = parser.parse_args()
    util.configLogging()
    computeEgain(args.gain_dir, args.market_file, args.egain_dir)
コード例 #4
0
ファイル: train_model.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--data_file', required=True)
  parser.add_argument('--label_file', required=True)
  parser.add_argument('--meta_file', required=True)
  # If specified, will be used in the fit function.
  parser.add_argument('--weight_file')
  # If specified, will be used to filter --meta_file.
  # Eg, --meta_file may contain metadata for all available data
  # while --train_meta_file may contain metadata for all data
  # with min_raw_price >= 10 and part of SP500 membership.
  # In this case, only data within --train_meta_file will be
  # collected for training, but --meta_file is still needed
  # for joining with --data_file and --label_file.
  parser.add_argument('--train_meta_file')
  parser.add_argument('--yyyymm', required=True,
                      help='last date of training period')
  parser.add_argument('--months', type=int, required=True,
                      help='length of training period in months, '
                           'use -1 to denote entire history')
  parser.add_argument('--model_def', required=True,
                      help='string of model def; eg, "Model(alpha=0.5)"')
  parser.add_argument('--perc', type=float, default=1.0,
                      help='if < 1, will randomly sample specified perc '
                           'of data for training')
  parser.add_argument('--imputer_strategy', default='zero',
                      help='strategy for filling in missing values')
  parser.add_argument('--model_file', required=True)
  parser.add_argument('--imputer_file', required=True)
  parser.add_argument('--tmp_data_file', required=True,
                      help='location of tmp data file within specified '
                           'training period; this can be used later for '
                           'evaluation, or specify --delete_tmp_files '
                           'to delete it upon finish')
  parser.add_argument('--tmp_label_file', required=True,
                      help='location of tmp label file within specified '
                           'training period; this can be used later for '
                           'evaluation, or specify --delete_tmp_files '
                           'to delete it upon finish')
  parser.add_argument('--tmp_weight_file')
  parser.add_argument('--delete_tmp_files', action='store_true')
  args = parser.parse_args()
  util.configLogging()
  if args.weight_file:
    assert args.tmp_weight_file, 'must specify --tmp_weight_file since --weight_file is specified'
  selectData(args.data_file, args.label_file, args.meta_file, args.weight_file,
             args.train_meta_file, args.yyyymm, args.months,
             args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file)
  trainModel(args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file,
             args.model_def, args.perc, args.imputer_strategy,
             args.model_file, args.imputer_file)
  if args.delete_tmp_files:
    deleteTmpFiles(args.tmp_data_file, args.tmp_label_file)
  # tmp_weight_file will not be used after this step so is not guarded by
  # --delete_tmp_files.
  if args.tmp_weight_file:
    os.remove(args.tmp_weight_file)
コード例 #5
0
ファイル: collect_labels.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--meta_file', required=True)
  parser.add_argument('--min_pos', type=float)
  parser.add_argument('--max_neg', type=float)
  parser.add_argument('--min_pos_perc', type=float)
  parser.add_argument('--max_neg_perc', type=float)
  parser.add_argument('--label_file', required=True)
  parser.add_argument('--weight_power', type=float, default=1)
  parser.add_argument('--weight_file')
  util.configLogging()
  collect(parser.parse_args())
コード例 #6
0
ファイル: compute_open_gain.py プロジェクト: galabing/qd3
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--yahoo_dir', required=True)
    parser.add_argument('--k',
                        type=int,
                        required=True,
                        help='number of days to look for gain')
    parser.add_argument('--fill', action='store_true')
    parser.add_argument('--gain_dir', required=True)
    args = parser.parse_args()
    util.configLogging()
    computeOpenGain(args)
コード例 #7
0
ファイル: convert_sf1_raw.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--sf1_file', required=True,
                      help='unzipped file of entire SF! database from quandl')
  parser.add_argument('--indicator_file', required=True,
                      help='file of supported indicators in SF1')
  parser.add_argument('--raw_dir', required=True,
                      help='output dir of raw files')
  parser.add_argument('--max_lines', type=int, default=0,
                      help='max number of lines to process from sf1_file; '
                           'only use this for debugging')
  args = parser.parse_args()
  util.configLogging()
  convertSf1Raw(args.sf1_file, args.indicator_file, args.raw_dir,
                args.max_lines)
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--sf1_file',
        required=True,
        help='unzipped file of entire SF! database from quandl')
    parser.add_argument('--indicator_file',
                        required=True,
                        help='file of supported indicators in SF1')
    parser.add_argument('--raw_dir',
                        required=True,
                        help='output dir of raw files')
    parser.add_argument('--max_lines',
                        type=int,
                        default=0,
                        help='max number of lines to process from sf1_file; '
                        'only use this for debugging')
    args = parser.parse_args()
    util.configLogging()
    convertSf1Raw(args.sf1_file, args.indicator_file, args.raw_dir,
                  args.max_lines)
コード例 #9
0
ファイル: collect_data.py プロジェクト: galabing/qd3
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gain_dir', required=True)
    parser.add_argument('--date_file')
    parser.add_argument('--max_neg', type=float, default=0.01)
    parser.add_argument('--min_pos', type=float, default=0.01)
    parser.add_argument('--feature_base_dir', required=True)
    parser.add_argument('--feature_list', required=True)
    parser.add_argument('--feature_stats',
                        required=True,
                        help='feature stats file with 1/99 percentiles '
                        'to filter out bad feature values')
    parser.add_argument('--min_date', default='0000-00-00')
    parser.add_argument('--max_date', default='9999-99-99')
    # Most features have a max lag of one quarter.
    parser.add_argument('--window', type=int, default=120)
    parser.add_argument('--min_feature_perc',
                        type=float,
                        default=0.8,
                        help='only use a feature vector if at least certain '
                        'perc of features are populated')
    parser.add_argument('--data_file', required=True)
    parser.add_argument('--label_file', required=True)
    parser.add_argument('--rlabel_file', required=True)
    parser.add_argument('--meta_file', required=True)
    parser.add_argument('--weight_power', type=float, default=1.0)
    parser.add_argument('--weight_file',
                        help='if specified, will assign a weight to each '
                        'training sample with its distance to the '
                        'pos/neg threshold')
    args = parser.parse_args()
    assert args.max_neg <= args.min_pos, 'max_neg > min_pos: %f vs %f' % (
        args.max_neg, args.min_pos)
    util.configLogging()
    collectData(args.gain_dir, args.date_file, args.max_neg, args.min_pos,
                args.feature_base_dir, args.feature_list, args.feature_stats,
                args.min_date, args.max_date, args.window,
                args.min_feature_perc, args.data_file, args.label_file,
                args.rlabel_file, args.meta_file, args.weight_power,
                args.weight_file)
コード例 #10
0
ファイル: collect_data.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--gain_dir', required=True)
  parser.add_argument('--feature_base_dir', required=True)
  parser.add_argument('--feature_list', required=True)
  parser.add_argument('--feature_stats', required=True,
                      help='feature stats file with 1/99 percentiles '
                           'to filter out bad feature values')
  parser.add_argument('--min_date', default='0000-00-00')
  parser.add_argument('--max_date', default='9999-99-99')
  # Most features have a max lag of one quarter.
  parser.add_argument('--window', type=int, default=120)
  parser.add_argument('--min_feature_perc', type=float, default=0.8,
                      help='only use a feature vector if at least certain '
                           'perc of features are populated')
  parser.add_argument('--data_file', required=True)
  parser.add_argument('--meta_file', required=True)
  args = parser.parse_args()
  util.configLogging()
  collectData(args.gain_dir, args.feature_base_dir, args.feature_list,
              args.feature_stats, args.min_date, args.max_date, args.window,
              args.min_feature_perc, args.data_file, args.meta_file)
コード例 #11
0
ファイル: run_experiment_2.py プロジェクト: galabing/qd3
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', required=True)
    args = parser.parse_args()
    util.configLogging()
    runExperiment(args.config)
コード例 #12
0
ファイル: run.py プロジェクト: galabing/qd3
# Shortcut to util.run() with dry run option.
def run(cmd, step=None):
    util.run(cmd, dry_run=DRY_RUN, step=step)


# Shortcut to util.markDone().
def markDone(step):
    util.markDone(step)


############
## Script ##
############

util.configLogging(LOG_LEVEL)

# Prepare dirs.
util.maybeMakeDirs([
    SYMBOL_DIR,
    TICKER_DIR,
    YAHOO_SF1_DIR,
    SF1_RAW_DIR,
    SF1_PROCESSED_DIR,
    EOD_RAW_DIR,
    EOD_PROCESSED_DIR,
    YAHOO_PROCESSED_DIR,
    FEATURE_DIR,
    FEATURE_INFO_DIR,
    MISC_DIR,
    EOD_PRICE_DIR,
コード例 #13
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--config', required=True)
  args = parser.parse_args()
  util.configLogging()
  runExperiment(args.config)
コード例 #14
0
ファイル: run.py プロジェクト: galabing/qd2
  logging.info('skipping step: %s' % step)
  return False

# Shortcut to util.run() with dry run option.
def run(cmd, step=None):
  util.run(cmd, dry_run=DRY_RUN, step=step)

# Shortcut to util.markDone().
def markDone(step):
  util.markDone(step)

############
## Script ##
############

util.configLogging(LOG_LEVEL)

# Prepare dirs.
util.maybeMakeDirs([
    SYMBOL_DIR,
    TICKER_DIR,
    YAHOO_SF1_DIR,
    SF1_RAW_DIR,
    SF1_PROCESSED_DIR,
    EOD_RAW_DIR,
    EOD_PROCESSED_DIR,
    YAHOO_PROCESSED_DIR,
    FEATURE_DIR,
    FEATURE_INFO_DIR,
    MISC_DIR,
    EOD_PRICE_DIR,
コード例 #15
0
ファイル: train_model.py プロジェクト: galabing/qd2
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_file', required=True)
    parser.add_argument('--label_file', required=True)
    parser.add_argument('--meta_file', required=True)
    # If specified, will be used in the fit function.
    parser.add_argument('--weight_file')
    # If specified, will be used to filter --meta_file.
    # Eg, --meta_file may contain metadata for all available data
    # while --train_meta_file may contain metadata for all data
    # with min_raw_price >= 10 and part of SP500 membership.
    # In this case, only data within --train_meta_file will be
    # collected for training, but --meta_file is still needed
    # for joining with --data_file and --label_file.
    parser.add_argument('--train_meta_file')
    parser.add_argument('--yyyymm',
                        required=True,
                        help='last date of training period')
    parser.add_argument('--months',
                        type=int,
                        required=True,
                        help='length of training period in months, '
                        'use -1 to denote entire history')
    parser.add_argument('--model_def',
                        required=True,
                        help='string of model def; eg, "Model(alpha=0.5)"')
    parser.add_argument('--perc',
                        type=float,
                        default=1.0,
                        help='if < 1, will randomly sample specified perc '
                        'of data for training')
    parser.add_argument('--imputer_strategy',
                        default='zero',
                        help='strategy for filling in missing values')
    parser.add_argument('--model_file', required=True)
    parser.add_argument('--imputer_file', required=True)
    parser.add_argument('--tmp_data_file',
                        required=True,
                        help='location of tmp data file within specified '
                        'training period; this can be used later for '
                        'evaluation, or specify --delete_tmp_files '
                        'to delete it upon finish')
    parser.add_argument('--tmp_label_file',
                        required=True,
                        help='location of tmp label file within specified '
                        'training period; this can be used later for '
                        'evaluation, or specify --delete_tmp_files '
                        'to delete it upon finish')
    parser.add_argument('--tmp_weight_file')
    parser.add_argument('--delete_tmp_files', action='store_true')
    args = parser.parse_args()
    util.configLogging()
    if args.weight_file:
        assert args.tmp_weight_file, 'must specify --tmp_weight_file since --weight_file is specified'
    selectData(args.data_file, args.label_file, args.meta_file,
               args.weight_file, args.train_meta_file, args.yyyymm,
               args.months, args.tmp_data_file, args.tmp_label_file,
               args.tmp_weight_file)
    trainModel(args.tmp_data_file, args.tmp_label_file, args.tmp_weight_file,
               args.model_def, args.perc, args.imputer_strategy,
               args.model_file, args.imputer_file)
    if args.delete_tmp_files:
        deleteTmpFiles(args.tmp_data_file, args.tmp_label_file)
    # tmp_weight_file will not be used after this step so is not guarded by
    # --delete_tmp_files.
    if args.tmp_weight_file:
        os.remove(args.tmp_weight_file)
コード例 #16
0
ファイル: predict_all_2.py プロジェクト: galabing/qd3
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_file', required=True)
    parser.add_argument('--label_file', required=True)
    parser.add_argument('--meta_file', required=True)
    # Similar to --train_meta_file in train_model.py
    parser.add_argument('--predict_meta_file')
    parser.add_argument('--model_dir', required=True)
    parser.add_argument('--model_prefix', required=True)
    parser.add_argument('--model_suffix', required=True)
    parser.add_argument('--imputer_dir', required=True)
    parser.add_argument('--imputer_prefix', required=True)
    parser.add_argument('--imputer_suffix', required=True)
    parser.add_argument('--prediction_window', type=int, required=True)
    parser.add_argument('--delay_window', type=int, required=True)
    parser.add_argument('--result_file', required=True)
    args = parser.parse_args()

    util.configLogging()

    # get dates for prediction
    with open(args.meta_file, 'r') as fp:
        lines = fp.read().splitlines()
    dates = set()
    for line in lines:
        tmp1, date, tmp2, tmp3 = line.split('\t')
        dates.add(date)
    dates = sorted(dates)

    # get model dates
    models = os.listdir(args.model_dir)
    model_dates = []
    for model in models:
        assert model.startswith(args.model_prefix)
        assert model.endswith(args.model_suffix)
        date = model[len(args.model_prefix):-len(args.model_suffix)]
        assert len(date) == 8  # yyyymmdd
        model_dates.append(date)
    model_dates.sort()

    ofp = open(args.result_file, 'w')

    started = False  # check no 'hole' in simulation period
    # In qd3 gain is calculated between day k+1 and day +1,
    # so delta should be prediction window + 1 to avoid
    # lookahead bias.
    delta = args.prediction_window + args.delay_window + 1
    for date in dates:
        ymd = util.getPreviousYmd(date, delta)
        y, m, d = ymd.split('-')
        model_date = '%s%s%s' % (y, m, d)
        index = bisect.bisect(model_dates, model_date) - 1
        if index < 0:
            assert not started
            continue
        model_date = model_dates[index]
        logging.info('predicting %s using model from %s' % (date, model_date))
        model_name = '%s%s%s' % (args.model_prefix, model_date,
                                 args.model_suffix)
        imputer_name = '%s%s%s' % (args.imputer_prefix, model_date,
                                   args.imputer_suffix)
        model_file = '%s/%s' % (args.model_dir, model_name)
        imputer_file = '%s/%s' % (args.imputer_dir, imputer_name)

        assert os.path.isfile(imputer_file)
        started = True

        meta = prepareData(date, args.data_file, args.label_file,
                           args.meta_file, args.predict_meta_file,
                           TMP_DATA_FILE)
        data = numpy.loadtxt(TMP_DATA_FILE)
        assert data.shape[0] == len(
            meta), 'inconsistent data size: %d vs %d' % (data.shape[0],
                                                         len(meta))

        with open(imputer_file, 'rb') as fp:
            imputer = pickle.load(fp)
        data = imputer.transform(data)

        with open(model_file, 'rb') as fp:
            model = pickle.load(fp)

        if 'predict_proba' in dir(model):
            prob = model.predict_proba(data)
            prob = [item[1] for item in prob]
        else:
            prob = model.predict(data)

        assert len(prob) == len(meta)
        items = [[meta[i][0], meta[i][1], prob[i]] for i in range(len(prob))]
        items.sort(key=lambda item: item[2], reverse=True)
        print >> ofp, 'date: %s' % date
        for item in items:
            ticker, gain, score = item
            print >> ofp, '\t%s\t%f\t%f' % (ticker, gain, score)

    ofp.close()
    if os.path.isfile(TMP_DATA_FILE):
        os.remove(TMP_DATA_FILE)
コード例 #17
0
def main(config):
    opt = parse_config(config, TRAIN)

    # update debug config (if in debug mode)
    if opt[DEBUG]:
        debug_config = {
            DATALOADER_NUM_WORKER: 0,
            NAME: DEBUG,
            LOG_EVERY: 1,
            VALID_EVERY: 1,
            NUM_EPOCH: 2
        }
        opt.update(debug_config)
        console.log(
            '[red]>>>> [[ WARN ]] You are in debug mode, update configs. <<<<[/red]'
        )
        console.log(debug_config)
        console.log(
            '[red]>>>> [[ WARN ]] You are in debug mode, update configs. <<<<[/red]'
        )

    # logging
    console.log('Running config:', opt, log_locals=False)
    opt[LOG_DIRPATH], opt[IMG_DIRPATH] = configLogging(TRAIN, opt)
    pl_logger = logging.getLogger("lightning")
    pl_logger.propagate = False

    # init model:
    ModelClass = parse_model_class(opt[RUNTIME][MODELNAME])
    ckpt = opt[CHECKPOINT_PATH]
    if ckpt:
        model = ModelClass.load_from_checkpoint(ckpt, opt=opt)
        console.log(f'Loading model from: {ckpt}')
    else:
        model = ModelClass(opt)

    # Loading data:
    transform = parseAugmentation(opt)
    training_dataset = ImagesDataset(opt, ds_type=DATA, transform=transform)
    trainloader = torch.utils.data.DataLoader(
        training_dataset,
        batch_size=opt[BATCHSIZE],
        shuffle=True,
        num_workers=opt[DATALOADER_NUM_WORKER],
        drop_last=True)

    valid_loader = None
    if opt[VALID_DATA] and opt[VALID_DATA][INPUT]:
        valid_dataset = ImagesDataset(opt,
                                      ds_type=VALID_DATA,
                                      transform=transform)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size=opt[VALID_BATCHSIZE],
            shuffle=False,
            num_workers=opt[DATALOADER_NUM_WORKER])
    console.log('Finish loading data.')

    # callbacks:
    checkpoint_callback = ModelCheckpoint(
        dirpath=opt[LOG_DIRPATH],
        save_last=True,
        save_weights_only=True,
        filename='{epoch:}-{step}',
        save_top_k=10,  # save 10 model
        monitor=opt[CHECKPOINT_MONITOR],
    )

    # trainer logger:
    mylogger = WandbLogger(name=opt[NAME],
                           project='vielab',
                           notes=None if not opt[COMMENT] else opt[COMMENT],
                           tags=[opt[RUNTIME][MODELNAME], opt[DATA][NAME]],
                           save_dir=ROOT_PATH)

    # init trainer:
    trainer = pl.Trainer(
        gpus=opt[GPU],
        distributed_backend='dp',
        # auto_select_gpus=True,
        max_epochs=opt[NUM_EPOCH],
        logger=mylogger,
        callbacks=[checkpoint_callback],
        precision=opt[RUNTIME_PRECISION],
        check_val_every_n_epoch=opt[VALID_EVERY])

    # training loop
    global OPT
    OPT = copy.deepcopy(opt)
    trainer.fit(model, trainloader, val_dataloaders=valid_loader)