Example #1
0
def make_plots_dir(dirname, argvalue):
    if argvalue.endswith(".data"):
        basename = argvalue[:-5]
    else:
        basename = argvalue
    return utils.make_output_dir(dirname, DEFAULT_PLOTS_DIR, basename,
                                 LAST_PLOTS_SYMLINK)
Example #2
0
def output(train_df, test_df, models, model_params, feature_importance_df,
           train_preds, test_preds, scores, now, model_name):
    score = sum(scores) / len(scores)
    folder_path = make_output_dir(score, now, model_name)
    for i, m in enumerate(models):
        save2pkl('{0}/model_{1:0=2}.pkl'.format(folder_path, i), m)
    with open('{0}/model_params.json'.format(folder_path), 'w') as f:
        json.dump(model_params, f, indent=4)
    with open('{0}/model_valid_scores.json'.format(folder_path), 'w') as f:
        json.dump({i: s for i, s in enumerate(scores)}, f, indent=4)
    save_importances(feature_importance_df,
                     '{}/importances.png'.format(folder_path),
                     '{}/importance.csv'.format(folder_path))
    # 以下の部分はコンペごとに修正が必要

    test_df.loc[:, 'target'] = test_preds
    test_df = test_df.reset_index()
    # targetが一定値以下のものをoutlierで埋める
    #q = test_df['target'].quantile(.0003)
    #q = 3
    #test_df.loc[:,'target']=test_df['target'].apply(lambda x: x if abs(x) > q else x-0.0001)
    test_df[['card_id',
             'target']].to_csv('{0}/submit_{1:%Y-%m%d-%H%M-%S}_{2}.csv'.format(
                 folder_path, now, score),
                               index=False)
    train_df.loc[:, 'OOF_PRED'] = train_preds
    train_df = train_df.reset_index()
    train_df[['card_id',
              'OOF_PRED']].to_csv('{0}/oof.csv'.format(folder_path), )
Example #3
0
def generate_output(
    stores: Iterable[Store],
    console: Console,
    generate_decklists: Optional[bool] = False,
) -> None:
    output_path = make_output_dir()
    table = Table(box=box.SIMPLE)
    table.add_column("Store")
    table.add_column("Count", justify="right", style="cyan")
    table.add_column("CSV File", style="magenta")

    if generate_decklists:
        table.add_column("Decklist File", style="magenta")

    for store in stores:
        csv_path = store.write_csv(output_path)
        table_row = [store.name, f"{len(store)}", f"{Path(*csv_path.parts[-2:])}"]
        if generate_decklists:
            decklist_path = store.write_decklist(output_path)
            table_row.append(f"{Path(*decklist_path.parts[-2:])}")
        table.add_row(*table_row)

    console.print(table)
Example #4
0
def output(train_df, test_df, models, model_params, feature_importance_df,
           train_preds, test_preds, scores, now, model_name):
    score = sum(scores) / len(scores)
    folder_path = make_output_dir(score, now, model_name)
    for i, m in enumerate(models):
        save2pkl('{0}/model_{1:0=2}.pkl'.format(folder_path, i), m)
    with open('{0}/model_params.json'.format(folder_path), 'w') as f:
        json.dump(model_params, f, indent=4)
    with open('{0}/model_valid_scores.json'.format(folder_path), 'w') as f:
        json.dump({i: s for i, s in enumerate(scores)}, f, indent=4)
    save_importances(feature_importance_df,
                     '{}/importances.png'.format(folder_path),
                     '{}/importance.csv'.format(folder_path))

    # 以下の部分はコンペごとに修正が必要
    submission_file_name = '{0}/submit_{1:%Y-%m-%d-%H-%M-%S}_{2}.csv'.format(
        folder_path, now, score)

    test_df.loc[:, 'target'] = test_preds
    test_df.loc[:, 'Outlier_Likelyhood'] = test_preds_bin
    q = test_df['Outlier_Likelyhood'].quantile(.9999)  # 1.0930%
    test_df.loc[:, 'target'] = test_df['Outlier_Likelyhood'].apply(
        lambda x: 1 if x > q else x)
    test_df = test_df.reset_index()
    test_df[['card_id', 'target']].to_csv(submission_file_name, index=False)

    train_df.loc[:, 'OOF_PRED'] = train_preds
    train_df = train_df.reset_index()
    train_df[['card_id',
              'OOF_PRED']].to_csv('{0}/oof.csv'.format(folder_path), )

    # API経由でsubmit
    if not is_debug:
        submit(competition_name,
               submission_file_name,
               comment='user02 cv: %.6f' % score)
Example #5
0
def make_results_dir(dirname):
    return utils.make_output_dir(dirname, DEFAULT_RESULTS_DIR, "results" + time.strftime("%Y%m%d-%H%M%S"), LAST_RESULTS_SYMLINK)
Example #6
0
    mtgjsondata = MtgjsonData()
    extractor = TokenExtractor()
    cards = list(mtgjsondata.load_cards(filterfunc=legal_card_filter))
    progress.update(task, total=len(cards))
    progress.start_task(task)
    test_cases = {}
    exceptions = []

    for card in cards:
        if card.name in test_cases:
            progress.advance(task)
            continue

        try:
            tokens = extractor.extract_from_card(card)
            test_cases[card.name] = tokens
        except Exception as err:
            # progress.console.print(f"[bold cyan]{card.name}")
            # progress.console.print(f"{card.text}")
            # progress.console.print(f"[red]{err}")
            # quit()
            exceptions.append((card.name, type(err).__name__))

        progress.advance(task)

    output_path = make_output_dir()
    csv_path = output_path / "exceptions.csv"
    with csv_path.open("w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(exceptions)
Example #7
0
def make_plots_dir(dirname, argvalue):
    if argvalue.endswith(".data"):
        basename = argvalue[:-5]
    else:
        basename = argvalue
    return utils.make_output_dir(dirname, DEFAULT_PLOTS_DIR, basename, LAST_PLOTS_SYMLINK)
Example #8
0
def make_results_dir(dirname):
    return utils.make_output_dir(dirname, DEFAULT_RESULTS_DIR,
                                 "results" + time.strftime("%Y%m%d-%H%M%S"),
                                 LAST_RESULTS_SYMLINK)
Example #9
0
def main(argv):
    del argv

    utils.make_output_dir(FLAGS.output_dir)
    data_processor = utils.DataProcessor()
    images = utils.get_train_dataset(data_processor, FLAGS.dataset,
                                     FLAGS.batch_size)

    logging.info('Learning rate: %d', FLAGS.learning_rate)

    # Construct optimizers.
    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)

    # Create the networks and models.
    generator = utils.get_generator(FLAGS.dataset)
    metric_net = utils.get_metric_net(FLAGS.dataset, FLAGS.num_measurements)

    model = cs.CS(metric_net, generator, FLAGS.num_z_iters, FLAGS.z_step_size,
                  FLAGS.z_project_method)
    prior = utils.make_prior(FLAGS.num_latents)
    generator_inputs = prior.sample(FLAGS.batch_size)

    model_output = model.connect(images, generator_inputs)
    optimization_components = model_output.optimization_components
    debug_ops = model_output.debug_ops
    reconstructions, _ = utils.optimise_and_sample(generator_inputs,
                                                   model,
                                                   images,
                                                   is_training=False)

    global_step = tf.train.get_or_create_global_step()
    update_op = optimizer.minimize(optimization_components.loss,
                                   var_list=optimization_components.vars,
                                   global_step=global_step)

    sample_exporter = file_utils.FileExporter(
        os.path.join(FLAGS.output_dir, 'reconstructions'))

    # Hooks.
    debug_ops['it'] = global_step
    # Abort training on Nans.
    nan_hook = tf.train.NanTensorHook(optimization_components.loss)
    # Step counter.
    step_conter_hook = tf.train.StepCounterHook()

    checkpoint_saver_hook = tf.train.CheckpointSaverHook(
        checkpoint_dir=utils.get_ckpt_dir(FLAGS.output_dir), save_secs=10 * 60)

    loss_summary_saver_hook = tf.train.SummarySaverHook(
        save_steps=FLAGS.summary_every_step,
        output_dir=os.path.join(FLAGS.output_dir, 'summaries'),
        summary_op=utils.get_summaries(debug_ops))

    hooks = [
        checkpoint_saver_hook, nan_hook, step_conter_hook,
        loss_summary_saver_hook
    ]

    if FLAGS.phase == 'train':
        # Start training.
        with tf.train.MonitoredSession(hooks=hooks) as sess:
            logging.info('starting training')

            for i in range(FLAGS.num_training_iterations):
                sess.run(update_op)

                if i % FLAGS.export_every == 0:
                    reconstructions_np, data_np = sess.run(
                        [reconstructions, images])
                    # Create an object which gets data and does the processing.
                    data_np = data_processor.postprocess(data_np)
                    reconstructions_np = data_processor.postprocess(
                        reconstructions_np)
                    sample_exporter.save(reconstructions_np, 'reconstructions')
                    sample_exporter.save(data_np, 'data')
    else:
        saver = tf.train.Saver()
        # Start testing
        with tf.Session() as sess:

            init_op = tf.global_variables_initializer()
            sess.run(init_op)

            print(" [*] Reading checkpoint...")
            checkpoint_dir = utils.get_ckpt_dir(FLAGS.output_dir)

            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name))

            reconstructions_np, data_np = sess.run([reconstructions, images])
            # Create an object which gets data and does the processing.
            data_np = data_processor.postprocess(data_np)
            reconstructions_np = data_processor.postprocess(reconstructions_np)
            sample_exporter.save(reconstructions_np, 'reconstructions')
            sample_exporter.save(data_np, 'data')
Example #10
0
        ofn.close()    
#############################################################################
###  MAIN body ##############################################################
#############################################################################

if __name__ == '__main__':
    #hard-coded-in INPUTS    

    QUANTILES = [.1,.9]

    starting_date = "1/1/2001"
    time_period_inYears = 1.0 # can be 1.33 if you like

    shift = -1 #if filled transform_df_ASneeded
    in_dir = "\\\\neptune\olenag\Projects\Prj01_Investigation\data\\"#  "C:\\temp\\olenas_tmps\\" #olenas_tmps \\temp\\olenas_dumps\\ \\temp\\olenas_tmps\\
    out_dir = utils.make_output_dir("\\\\neptune\olenag\Projects\Prj01_Investigation\outputs")    
    
    #main data structures    
    STOCKS ={} #keys are stocknames; it contains dict-like Date_Stock
    Stock_Names = []
    DF = [] #historical; not sure I need it now
    Stocks = [] #historical, because there are functions that are written for DF and Stocks    
    
    #functions on the inputs to conform them to what they should be    
    starting_date = parser.parse(starting_date).date()
    print starting_date
    Dates = utils.form_date_segments(starting_date,time_period_inYears)
    
    """if decide to look at the entrie file, set Dates = []"""
    #Dates = []