コード例 #1
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def predict(experiment_dir, config_map, predict_meta_file):
  result_dir = getResultDir(experiment_dir)
  util.maybeMakeDir(result_dir)
  result_file = getResultPath(result_dir)

  data_dir = getDataDir(experiment_dir)
  data_file = getDataPath(data_dir)
  if config_map['use_classification']:
    label_file = getLabelPath(data_dir)
  else:
    label_file = getRlabelPath(data_dir)
  meta_file = getMetaPath(data_dir)
  model_dir = getModelDir(experiment_dir)
  imputer_dir = getImputerDir(experiment_dir)

  model_prefix = '%s-' % getModelName(config_map)
  model_suffix = '-%d' % config_map['train_window']
  imputer_prefix = 'imputer-'
  imputer_suffix = '-%d' % config_map['train_window']

  cmd = ('%s/predict_all.py --data_file=%s --label_file=%s '
         '--meta_file=%s --model_dir=%s --model_prefix="%s" '
         '--model_suffix="%s" --imputer_dir=%s --imputer_prefix="%s" '
         '--imputer_suffix="%s" --prediction_window=%d '
         '--delay_window=%d --predict_meta_file=%s --result_file=%s' % (
            CODE_DIR, data_file, label_file, meta_file,
            model_dir, model_prefix, model_suffix,
            imputer_dir, imputer_prefix, imputer_suffix,
            config_map['predict_window'],
            config_map['delay_window'], predict_meta_file,
            result_file))
  util.run(cmd)
コード例 #2
0
ファイル: run_experiment_2.py プロジェクト: galabing/qd3
def predict(experiment_dir, config_map, predict_meta_file):
    result_dir = getResultDir(experiment_dir)
    util.maybeMakeDir(result_dir)
    result_file = getResultPath(result_dir)

    data_dir = getDataDir(experiment_dir)
    data_file = getDataPath(data_dir)
    if config_map['use_classification']:
        label_file = getLabelPath(data_dir)
    else:
        label_file = getRlabelPath(data_dir)
    meta_file = getMetaPath(data_dir)
    model_dir = getModelDir(experiment_dir)
    imputer_dir = getImputerDir(experiment_dir)

    model_prefix = '%s-' % getModelName(config_map)
    model_suffix = '-%d' % config_map['train_window']
    imputer_prefix = 'imputer-'
    imputer_suffix = '-%d' % config_map['train_window']

    cmd = ('%s/predict_all_2.py --data_file=%s --label_file=%s '
           '--meta_file=%s --model_dir=%s --model_prefix="%s" '
           '--model_suffix="%s" --imputer_dir=%s --imputer_prefix="%s" '
           '--imputer_suffix="%s" --prediction_window=%d '
           '--delay_window=%d --predict_meta_file=%s --result_file=%s' %
           (CODE_DIR, data_file, label_file, meta_file, model_dir,
            model_prefix, model_suffix, imputer_dir, imputer_prefix,
            imputer_suffix, config_map['predict_window'],
            config_map['delay_window'], predict_meta_file, result_file))
    util.run(cmd)
コード例 #3
0
ファイル: run_experiment_2.py プロジェクト: galabing/qd3
def collectData(experiment_dir, config_map):
    data_dir = getDataDir(experiment_dir)
    util.maybeMakeDir(data_dir)

    gain_dir = getLabelDir(config_map['label'])
    feature_list = getFeatureListPath(experiment_dir)
    data_file = getDataPath(data_dir)
    label_file = getLabelPath(data_dir)
    rlabel_file = getRlabelPath(data_dir)
    meta_file = getMetaPath(data_dir)
    weight_file = getWeightPath(data_dir)
    date_file = getDatePath(config_map['predict_date_file'])

    cmd = (
        '%s/collect_data.py --gain_dir=%s --max_neg=%f --min_pos=%f '
        '--feature_base_dir=%s --feature_list=%s --feature_stats=%s '
        '--min_date=%s --max_date=%s --window=%d --min_feature_perc=%f '
        '--data_file=%s --label_file=%s --rlabel_file=%s --meta_file=%s '
        '--weight_power=%f --weight_file=%s --date_file=%s' %
        (CODE_DIR, gain_dir, config_map['max_neg'], config_map['min_pos'],
         FEATURE_DIR, feature_list, FEATURE_STATS_FILE, config_map['min_date'],
         config_map['max_date'], config_map['feature_window'],
         config_map['min_feature_perc'], data_file, label_file, rlabel_file,
         meta_file, config_map['weight_power'], weight_file, date_file))
    util.run(cmd)
コード例 #4
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def runExperiment(config_file):
  assert config_file.endswith(CONFIG_SUFFIX)
  pos = config_file.rfind('/')
  assert pos > 0
  experiment = config_file[pos+1:-len(CONFIG_SUFFIX)]
  experiment_dir = '%s/%s' % (EXPERIMENT_BASE_DIR, experiment)
  util.maybeMakeDir(experiment_dir)
  config_map = getConfig(config_file)

  makeFeatureList(experiment_dir, config_map)

  step = '%s_collect_data' % experiment
  if not util.checkDone(step):
    collectData(experiment_dir, config_map)
    util.markDone(step)

  step = '%s_collect_label' % experiment
  if not util.checkDone(step):
    collectLabels(experiment_dir, config_map)
    util.markDone(step)

  data_dir = getDataDir(experiment_dir)
  if config_map['use_classification']:
    # For classification, negative labels indicate gain between
    # max_neg and min_pos and should be removed from training
    # (but not prediction).
    label_file = getLabelPath(data_dir)
  else:
    # Do not filter out negative regression labels.
    label_file = None
  train_meta_file = getTrainingMetaPath(data_dir)
  predict_meta_file = getPredictionMetaPath(data_dir)

  step = '%s_filter_train' % experiment
  if not util.checkDone(step):
    filterMetadata(experiment_dir, config_map,
                   config_map['train_filter'], label_file, train_meta_file)
    util.markDone(step)

  step = '%s_filter_predict' % experiment
  if not util.checkDone(step):
    filterMetadata(experiment_dir, config_map,
                   config_map['predict_filter'], None, predict_meta_file)
    util.markDone(step)

  step = '%s_train_models' % experiment
  if not util.checkDone(step):
    trainModels(experiment_dir, config_map, train_meta_file)
    util.markDone(step)

  step = '%s_predict' % experiment
  if not util.checkDone(step):
    predict(experiment_dir, config_map, predict_meta_file)
    util.markDone(step)

  step = '%s_analyze' % experiment
  if not util.checkDone(step):
    analyze(experiment_dir, config_map)
    util.markDone(step)
コード例 #5
0
def runExperiment(config_file):
    assert config_file.endswith(CONFIG_SUFFIX)
    pos = config_file.rfind('/')
    assert pos > 0
    experiment = config_file[pos + 1:-len(CONFIG_SUFFIX)]
    experiment_dir = '%s/%s' % (EXPERIMENT_BASE_DIR, experiment)
    util.maybeMakeDir(experiment_dir)
    config_map = getConfig(config_file)

    makeFeatureList(experiment_dir, config_map)

    step = '%s_collect_data' % experiment
    if not util.checkDone(step):
        collectData(experiment_dir, config_map)
        util.markDone(step)

    step = '%s_collect_label' % experiment
    if not util.checkDone(step):
        collectLabels(experiment_dir, config_map)
        util.markDone(step)

    data_dir = getDataDir(experiment_dir)
    if config_map['use_classification']:
        # For classification, negative labels indicate gain between
        # max_neg and min_pos and should be removed from training
        # (but not prediction).
        label_file = getLabelPath(data_dir)
    else:
        # Do not filter out negative regression labels.
        label_file = None
    train_meta_file = getTrainingMetaPath(data_dir)
    predict_meta_file = getPredictionMetaPath(data_dir)

    step = '%s_filter_train' % experiment
    if not util.checkDone(step):
        filterMetadata(experiment_dir, config_map, config_map['train_filter'],
                       label_file, train_meta_file)
        util.markDone(step)

    step = '%s_filter_predict' % experiment
    if not util.checkDone(step):
        filterMetadata(experiment_dir, config_map,
                       config_map['predict_filter'], None, predict_meta_file)
        util.markDone(step)

    step = '%s_train_models' % experiment
    if not util.checkDone(step):
        trainModels(experiment_dir, config_map, train_meta_file)
        util.markDone(step)

    step = '%s_predict' % experiment
    if not util.checkDone(step):
        predict(experiment_dir, config_map, predict_meta_file)
        util.markDone(step)

    step = '%s_analyze' % experiment
    if not util.checkDone(step):
        analyze(experiment_dir, config_map)
        util.markDone(step)
コード例 #6
0
ファイル: run_experiment_2.py プロジェクト: galabing/qd3
def analyze(experiment_dir, config_map):
    market_gain_file = getMarketGainPath(config_map)
    analyze_dir = getAnalyzeDir(experiment_dir)
    util.maybeMakeDir(analyze_dir)
    result_dir = getResultDir(experiment_dir)
    result_file = getResultPath(result_dir)
    cmd = ('%s/analyze_all.py --result_file=%s --skip_trans '
           '--analyze_dir=%s --market_gain_file=%s' %
           (CODE_DIR, result_file, analyze_dir, market_gain_file))
    util.run(cmd)
コード例 #7
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def analyze(experiment_dir, config_map):
  market_gain_file = getMarketGainPath(config_map)
  analyze_dir = getAnalyzeDir(experiment_dir)
  util.maybeMakeDir(analyze_dir)
  result_dir = getResultDir(experiment_dir)
  result_file = getResultPath(result_dir)
  cmd = ('%s/analyze_all.py --result_file=%s --hold_period=%d '
         '--analyze_dir=%s --market_gain_file=%s' % (
            CODE_DIR, result_file, config_map['predict_window'],
            analyze_dir, market_gain_file))
  util.run(cmd)
コード例 #8
0
def analyze(experiment_dir, config_map):
    market_gain_file = getMarketGainPath(config_map)
    analyze_dir = getAnalyzeDir(experiment_dir)
    util.maybeMakeDir(analyze_dir)
    result_dir = getResultDir(experiment_dir)
    result_file = getResultPath(result_dir)
    cmd = ('%s/analyze_all.py --result_file=%s --hold_period=%d '
           '--analyze_dir=%s --market_gain_file=%s' %
           (CODE_DIR, result_file, config_map['predict_window'], analyze_dir,
            market_gain_file))
    util.run(cmd)
コード例 #9
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def collectData(experiment_dir, config_map):
  data_dir = getDataDir(experiment_dir)
  util.maybeMakeDir(data_dir)

  gain_dir = getLabelDir(config_map['label'])
  feature_list = getFeatureListPath(experiment_dir)
  data_file = getDataPath(data_dir)
  meta_file = getMetaPath(data_dir)

  cmd = ('%s/collect_data.py --gain_dir=%s --feature_base_dir=%s '
         '--feature_list=%s --feature_stats=%s --min_date=%s --max_date=%s '
         '--window=%d --min_feature_perc=%f --data_file=%s --meta_file=%s' % (
            CODE_DIR, gain_dir, FEATURE_DIR, feature_list, FEATURE_STATS_FILE,
            config_map['min_date'], config_map['max_date'],
            config_map['feature_window'], config_map['min_feature_perc'],
            data_file, meta_file))
  util.run(cmd)
コード例 #10
0
def collectData(experiment_dir, config_map):
    data_dir = getDataDir(experiment_dir)
    util.maybeMakeDir(data_dir)

    gain_dir = getLabelDir(config_map['label'])
    feature_list = getFeatureListPath(experiment_dir)
    data_file = getDataPath(data_dir)
    meta_file = getMetaPath(data_dir)

    cmd = ('%s/collect_data.py --gain_dir=%s --feature_base_dir=%s '
           '--feature_list=%s --feature_stats=%s --min_date=%s --max_date=%s '
           '--window=%d --min_feature_perc=%f --data_file=%s --meta_file=%s' %
           (CODE_DIR, gain_dir, FEATURE_DIR, feature_list, FEATURE_STATS_FILE,
            config_map['min_date'], config_map['max_date'],
            config_map['feature_window'], config_map['min_feature_perc'],
            data_file, meta_file))
    util.run(cmd)
コード例 #11
0
ファイル: run.py プロジェクト: galabing/qd3
if logDo('project_yahoo'):
    cmd = (
        '%s/project_yahoo.py --raw_dir=%s --trading_day_file=%s '
        '--projected_dir=%s' %
        (CODE_DIR, YAHOO_SF1_DIR, YAHOO_TRADING_DAY_FILE, YAHOO_PROJECTED_DIR))
    run(cmd, 'project_yahoo')

if logDo('adjust_yahoo'):
    cmd = '%s/adjust_yahoo.py --yahoo_dir=%s --output_dir=%s' % (
        CODE_DIR, YAHOO_PROJECTED_DIR, YAHOO_ADJUSTED_DIR)
    run(cmd, 'adjust_yahoo')

if logDo('compute_rolling_window_volumed'):
    output_dir = '%s/volumed_mean_%d' % (YAHOO_ADJUSTED_DIR, VOLUMED_K)
    util.maybeMakeDir(output_dir)
    cmd = ('%s/compute_rolling_window_feature.py --input_dir=%s/volumed '
           '--window=%d --method=mean --output_dir=%s' %
           (CODE_DIR, YAHOO_ADJUSTED_DIR, VOLUMED_K, output_dir))
    run(cmd, 'compute_rolling_window_volumed')

if logDo('compute_window_features'):
    cmd = ('%s/compute_window_features.py --adjusted_dir=%s '
           '--feature_base_dir=%s --computer=%s/compute_window_feature.py') % (
               CODE_DIR, YAHOO_ADJUSTED_DIR, FEATURE_DIR, CODE_DIR)
    run(cmd, 'compute_window_features')

if logDo('compute_basic_features'):
    cmd = ('%s/compute_basic_features.py --processed_dir=%s --ticker_file=%s '
           '--feature_base_dir=%s --info_dir=%s '
           '--computer=%s/compute_basic_feature.py') % (
コード例 #12
0
ファイル: run.py プロジェクト: galabing/qd2
if logDo('get_yahoo_logadjprice'):
  cmd = ('%s/get_price_volume.py --processed_dir=%s --column=adjprice '
         '--take_log --output_dir=%s' % (
      CODE_DIR, YAHOO_PROCESSED_DIR, YAHOO_LOGADJPRICE_DIR))
  run(cmd, 'get_yahoo_logadjprice')

if logDo('get_yahoo_logadjvolume'):
  cmd = ('%s/get_price_volume.py --processed_dir=%s --column=adjvolume '
         '--take_log --output_dir=%s' % (
      CODE_DIR, YAHOO_PROCESSED_DIR, YAHOO_LOGADJVOLUME_DIR))
  run(cmd, 'get_yahoo_logadjvolume')

if logDo('get_eod_gain_feature'):
  for k in GAIN_K_LIST:
    gain_dir = '%s/%d' % (EOD_GAIN_DIR, k)
    util.maybeMakeDir(gain_dir)
    cmd = '%s/compute_gain.py --price_dir=%s --k=%d --gain_dir=%s' % (
        CODE_DIR, EOD_ADJPRICE_DIR, k, gain_dir)
    run(cmd)
  markDone('get_eod_gain_feature')

if logDo('get_yahoo_gain_feature'):
  for k in GAIN_K_LIST:
    gain_dir = '%s/%d' % (YAHOO_GAIN_DIR, k)
    util.maybeMakeDir(gain_dir)
    cmd = '%s/compute_gain.py --price_dir=%s --k=%d --gain_dir=%s' % (
        CODE_DIR, YAHOO_ADJPRICE_DIR, k, gain_dir) 
    run(cmd)
  markDone('get_yahoo_gain_feature')

if logDo('get_membership'):
コード例 #13
0
ファイル: run_experiment_2.py プロジェクト: galabing/qd3
def trainModels(experiment_dir, config_map, train_meta_file):
    date_file = getDatePath(config_map['train_date_file'])
    with open(date_file, 'r') as fp:
        dates = sorted(fp.read().splitlines())

    data_dir = getDataDir(experiment_dir)
    data_file = getDataPath(data_dir)
    if config_map['use_classification']:
        label_file = getLabelPath(data_dir)
    else:
        label_file = getRlabelPath(data_dir)
    meta_file = getMetaPath(data_dir)

    model_dir = getModelDir(experiment_dir)
    util.maybeMakeDir(model_dir)
    imputer_dir = getImputerDir(experiment_dir)
    util.maybeMakeDir(imputer_dir)

    stats_file = getStatsPath(experiment_dir, config_map)
    weight_args = ''
    if config_map['use_weight']:
        weight_args = '--weight_file=%s --tmp_weight_file=%s' % (
            getWeightPath(data_dir), TMP_WEIGHT_FILE)
    with open(stats_file, 'w') as fp:
        # Keep in sync with evaluateModel().
        print >> fp, '\t'.join([
            'date',
            'f1',
            'auc',
            '1perc-precision',
            '1perc-recall',
            '10perc-precision',
            '10perc-recall',
            '100perc-precision',
            '100perc-recall',
        ])
        for date in dates:
            if date < config_map['start_date']:
                continue
            model_file = getModelPath(model_dir, date, config_map)
            imputer_file = getImputerPath(imputer_dir, date, config_map)
            cmd = (
                '%s/train_model.py --data_file=%s --label_file=%s --meta_file=%s %s '
                '--date=%s --months=%d --model_def="%s" --perc=%f --model_file=%s '
                '--train_meta_file=%s --tmp_data_file=%s --tmp_label_file=%s '
                '--imputer_strategy=%s --imputer_file=%s' %
                (CODE_DIR, data_file, label_file, meta_file, weight_args, date,
                 config_map['train_window'], config_map['model_spec'],
                 config_map['train_perc'], model_file, train_meta_file,
                 TMP_DATA_FILE, TMP_LABEL_FILE, config_map['imputer_strategy'],
                 imputer_file))
            util.run(cmd)
            if not os.path.isfile(model_file):
                continue
            if config_map['use_classification']:
                result = evaluateModel(model_file, imputer_file, TMP_DATA_FILE,
                                       TMP_LABEL_FILE)
                # Keep in sync with evaluateModel().
                values = [date, '%.4f' % result['f1'], '%.4f' % result['auc']]
                for perc in EVAL_PERCS:
                    values.append('%.4f' % result['%dperc-precision' % perc])
                    values.append('%.4f' % result['%dperc-recall' % perc])
                print >> fp, '\t'.join(values)
                fp.flush()
コード例 #14
0
ファイル: run_experiment.py プロジェクト: galabing/qd2
def trainModels(experiment_dir, config_map, train_meta_file):
  dates = []
  date = config_map['start_date']
  while date <= config_map['end_date']:
    dates.append(date)
    year = int(date[:4])
    month = int(date[4:])
    if month < 12:
      month += 1
    else:
      month = 1
      year += 1
    date = '%04d%02d' % (year, month)

  data_dir = getDataDir(experiment_dir)
  data_file = getDataPath(data_dir)
  if config_map['use_classification']:
    label_file = getLabelPath(data_dir)
  else:
    label_file = getRlabelPath(data_dir)
  meta_file = getMetaPath(data_dir)

  model_dir = getModelDir(experiment_dir)
  util.maybeMakeDir(model_dir)
  imputer_dir = getImputerDir(experiment_dir)
  util.maybeMakeDir(imputer_dir)

  stats_file = getStatsPath(experiment_dir, config_map)
  weight_args = ''
  if config_map['use_weight']:
    weight_args = '--weight_file=%s --tmp_weight_file=%s' % (
        getWeightPath(data_dir), TMP_WEIGHT_FILE)
  with open(stats_file, 'w') as fp:
    # Keep in sync with evaluateModel().
    print >> fp, '\t'.join([
        'date',
        'f1',
        'auc',
        '1perc-precision',
        '1perc-recall',
        '10perc-precision',
        '10perc-recall',
        '100perc-precision',
        '100perc-recall',
    ])
    for date in dates:
      model_file = getModelPath(model_dir, date, config_map)
      imputer_file = getImputerPath(imputer_dir, date, config_map)
      cmd = ('%s/train_model.py --data_file=%s --label_file=%s --meta_file=%s %s '
             '--yyyymm=%s --months=%d --model_def="%s" --perc=%f --model_file=%s '
             '--train_meta_file=%s --tmp_data_file=%s --tmp_label_file=%s '
             '--imputer_strategy=%s --imputer_file=%s' % (
                CODE_DIR, data_file, label_file, meta_file, weight_args, date,
                config_map['train_window'], config_map['model_spec'],
                config_map['train_perc'], model_file, train_meta_file,
                TMP_DATA_FILE, TMP_LABEL_FILE,
                config_map['imputer_strategy'], imputer_file))
      util.run(cmd)
      if not os.path.isfile(model_file):
        continue
      if config_map['use_classification']:
        result = evaluateModel(model_file, imputer_file, TMP_DATA_FILE, TMP_LABEL_FILE)
        # Keep in sync with evaluateModel().
        values = [date, '%.4f' % result['f1'], '%.4f' % result['auc']]
        for perc in EVAL_PERCS:
          values.append('%.4f' % result['%dperc-precision' % perc])
          values.append('%.4f' % result['%dperc-recall' % perc])
        print >> fp, '\t'.join(values)
        fp.flush()