Beispiel #1
0
def launch_single(config: CN, run_type: str, ckpt_path: str, clear_only=False):
    if clear_only:
        check_exists(config.TENSORBOARD_DIR, preserve=False)
        check_exists(config.CHECKPOINT_DIR, preserve=False)
        check_exists(config.LOG_DIR, preserve=False)
        exit(0)
    if run_type == "train":
        if ckpt_path is not None:
            runner = Runner(config)
            runner.train(checkpoint_path=ckpt_path)
        else:
            if DO_PRESERVE_RUNS:
                if check_exists(config.TENSORBOARD_DIR) or \
                    check_exists(config.CHECKPOINT_DIR) or \
                    check_exists(config.LOG_DIR):
                    exit(1)
            else:
                check_exists(config.TENSORBOARD_DIR)
                check_exists(config.CHECKPOINT_DIR)
                check_exists(config.LOG_DIR)
            runner = Runner(config)
            runner.train()
    elif run_type == "eval":
        runner = Runner(config)
        runner.eval(checkpoint_path=ckpt_path)
Beispiel #2
0
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('read_path')
    arg_parser.add_argument('write_path')
    args = arg_parser.parse_args()

    with open(args.read_path, 'r') as source:
        text = source.read()

        lexer = Lexer(text)
        tokens = lexer.lex()

        parser = Parser(tokens)
        ast = parser.parse()

        symbolizer = Symbolizer(ast)
        symbolizer.symbolize()

        optimizer = Optimizer(ast)
        optimizer.optimize()

        grapher = Grapher(ast)
        grapher.graph()

        generator = Generator(ast)
        generator.generate(args.write_path)

        runner = Runner(ast)
        runner.run()
Beispiel #3
0
def main():

    try:
        config = Config.load()

        init_logging(config)

        runner = Runner(config)
        runner.run()

        return 0

    except KeyboardInterrupt:
        _logger.info("aborted.")
        return 0

    except MessageException as ex:
        _logger.error(ex)
        _logger.error("aborted!")
        return 1

    except Exception as ex:
        _logger.exception(ex)
        _logger.error("aborted!")
        # no runner.close() to signal abnormal termination!
        return 1
def main():
    server_socket = get_server_socket()
    while True:
        connection, address = server_socket.accept()
        connection.send('accepted'.encode())
        print('server start')
        Runner(ClientInfo(connection, address)).start()
    def measurement__set_csv_writer(self, args):
        import os
        from src.bank import Bank
        from src.household import Household
        from src.firm import Firm
        from src.environment import Environment
        from src.transaction import Transaction
        from src.market import Market
        from src.runner import Runner
        from src.measurement import Measurement

        text = "This test checks measurement.set_csv_writer \n"
        self.print_info(text)
        #
        # INITIALIZATION
        #
        environment_directory = str(args[0])
        identifier = str(args[1])
        log_directory = str(args[2])

        # Configure logging parameters so we get output while the program runs
        logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                            filename=log_directory + identifier + ".log", level=logging.INFO)
        logging.info('START logging for test measurement__set_csv_writer in run: %s',
                     environment_directory + identifier + ".xml")

        # Construct household filename
        environment = Environment(environment_directory,  identifier)

        # Construct a runner
        runner = Runner(environment)

        # generate a bank
        bank = Bank()
        bank.identifier = "test_bank"
        environment.banks.append(bank)

        # generate a firm
        firm = Firm()
        firm.identifier = "test_firm"
        environment.firms.append(firm)

        # generate a household
        household = Household()
        household.identifier = "test_household"
        environment.households.append(household)

        #
        # TESTING
        #
        import csv
        file_new = open("__init__.py", "r")
        csv_writer = csv.writer(file_new, lineterminator='\n')
        measurement = Measurement(environment, runner)
        print("Measurement's csv_writer:")
        print(measurement.get_csv_writer())
        measurement.set_csv_writer(csv_writer)
        print("Measurement's csv_writer:")
        print(measurement.get_csv_writer())
Beispiel #6
0
    def initialize_shock(self, shock_config):
        from src.runner import Runner
        runner = Runner(self)

        from src.shock import Shock
        shock = Shock(self, runner)
        shock.read_xml_config_file(shock_config)
        self.shocks.append(shock)
    def test_run_proc(self, cfg_read):
        with mock.patch('src.runner.multiprocessing',
                        autospec=True) as multi_process_mock:
            runner = Runner()
            procs = [runner.web_monitor_proc]
            runner.run_procs(procs, cfg_read)

            multi_process_mock.Process.assert_called_once_with(
                target=procs[0], args=(cfg_read, ))
            multi_process_mock.Process.return_value.join.assert_called_once()
def main(is_debug):
    """ Training Pipeline
    """
    with open("./config.yaml") as yf:
        config = yaml.safe_load(yf)

    # run single models
    for config_ in config["models"]:
        pprint.pprint(config_)
        runner = Runner(settings, AttrDict(config_))
        runner.run(is_debug=args.debug, multi_gpu=args.multi_gpu)
Beispiel #9
0
def setup(params, epsilon_action_modifier, parallel_size):
    env_creator = EnvCreator(params.env_name,
                             parallel_size,
                             wrapper=params.env_wrapper,
                             seed=12)

    agent = DQNAgent(env_creator, params.network_fn, epsilon_action_modifier,
                     params.gamma, params.learning_rate,
                     params.target_net_sync, params.use_double_q)
    runner = Runner(env_creator, agent)
    return agent, runner
    def test_web_monitor_proc(self, asyncio_mock, web_monitor_app_mock,
                              cfg_read):
        runner = Runner()
        runner.web_monitor_proc(cfg_read)

        asyncio_mock.get_event_loop.assert_called_once()
        loop_mock = asyncio_mock.get_event_loop.return_value
        loop_mock.stop.assert_called_once()
        loop_mock.run_until_complete.assert_called_once()

        web_monitor_app_mock.return_value.run.assert_called_once()
        web_monitor_app_mock.return_value.stop.assert_called_once()
    def test_stats_consumer_proc(self, asyncio_mock, consumer_app_mock,
                                 db_mock, cfg_read):
        runner = Runner()
        runner.stats_consumer_proc(cfg_read)

        asyncio_mock.get_event_loop.assert_called_once()
        loop_mock = asyncio_mock.get_event_loop.return_value
        loop_mock.stop.assert_called_once()
        loop_mock.run_until_complete.assert_called_once()

        db_mock.return_value.clean_up.assert_called_once()

        consumer_app_mock.return_value.run.assert_called_once()
        consumer_app_mock.return_value.stop.assert_called_once()
Beispiel #12
0
def main():
    args = parse_args()
    set_global_seeds(666)
    config = get_config(args.config)
    pprint(config)
    config['train_params'][
        'name'] = f'{config["train_params"]["name"]}/fold{args.fold}'
    factory = Factory(config['train_params'])
    data_factory = DataFactory(config['data_params'], fold=args.fold)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    callbacks = create_callbacks(config['train_params']['name'],
                                 config['dumps'])
    trainer = Runner(stages=config['stages'],
                     factory=factory,
                     callbacks=callbacks,
                     device=device)
    trainer.fit(data_factory)
def init(variant,
         ckpt="lve",
         base="",
         prefix="",
         graph_file=None,
         device=None):
    # Initialize model
    # If graph file is specified in config, that will be used
    # If config specifies directory, we'll use `graph_file` for the filename
    # If `graph_file` is None, the (alphabetically) first file will be used

    run_type = "eval"
    exp_config = osp.join("../configs", prefix, f"{variant}.yaml")
    if base != "":
        exp_config = [osp.join("../configs", f"{base}.yaml"), exp_config]
    ckpt_path = f"{variant}.{ckpt}.pth"

    config, ckpt_path = prepare_config(exp_config,
                                       run_type,
                                       ckpt_path, [
                                           "USE_TENSORBOARD",
                                           False,
                                           "SYSTEM.NUM_GPUS",
                                           1,
                                       ],
                                       suffix=prefix,
                                       graph_file=graph_file)
    if graph_file is None and osp.isdir(config.MODEL.GRAPH_FILE):
        config.defrost()
        graphs = sorted(f for f in os.listdir(config.MODEL.GRAPH_FILE)
                        if f.endswith('.edgelist'))
        graph = graphs[0]  # ! Oh shoot. I messed this up.
        config.MODEL.GRAPH_FILE = osp.join(config.MODEL.GRAPH_FILE, graph)
        graph_id = graph[:5]
        add_suffix(config, graph_id)
        ckpt_dir, ckpt_fn = osp.split(ckpt_path)
        ckpt_path = osp.join(ckpt_dir, graph_id, ckpt_fn)
        # Update relative path
        # Incorporate graph file into this loading. Currently, it will use the default one in the config.
        config.freeze()
    runner = Runner(config)
    runner.logger.clear_filehandlers()
    runner.load_device(device=device)
    return runner, ckpt_path
Beispiel #14
0
    def initialize_shock(self, shock_config):
        from src.runner import Runner
        runner = Runner(self)

        from src.shock import Shock
        shock = Shock(self, runner)
        shock.read_xml_config_file(shock_config)
        self.shocks.append(shock)

        shock.measure_intitial_shock(self)
        for k, v in shock.legend.iteritems():
            if shock.legend[k] != 0:
                self.shock_measure = (k, v)
                # df_shock = pd.DataFrame[]

    # you can use this code below to see if the function of reading the shock worked
        for key in shock.asset_returns:
            if shock.asset_returns[key] != 0.0:
                # print "0. ***ENV.PY*** When shock is initialised:  The asset class", key, "is shocked by", shock.asset_returns[key] * 100, "%"
                pass
def start():
    try:
        path = "var/accounts/" + os.getenv('ACCOUNT_FILE_NAME')

        if os.path.isfile(path):

            logger.info('Found file ' + path)

            with open(path) as json_file:

                data = json.load(json_file)

                if not data[0]['username'] or not data[0]['password']:
                    logger.error('Username and password are required')

                for account in data:
                    runner = Runner(account, os.getenv('API_URL'), logger)
                    try:
                        runner.start()
                    except:
                        runner.driver.quit()

        else:
            if not os.getenv('ACCOUNT_FILE_NAME'):

                logger.error('ACCOUNT_FILE_NAME environment variable not set')

            else:

                logger.error('Could not find file: ' + path)

    except Exception as error:
        just_the_string = traceback.format_exc()
        logger.error(just_the_string)
        logger.exception(error)

    return 'Finished'
Beispiel #16
0
        'input_dropout': 0.05,
        'optimizer': {
            'lr': 0.0015,
            'type': 'adam'
        }
    }
    #tf-idf =          {'batch_norm': 'no', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.0, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0018, 'type': 'adam'}}
    #word2vec_mean =   {'batch_norm': 'before_act', 'batch_size': 256.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00037, 'type': 'adam'}}
    #word2vec_max =    {'batch_norm': 'no', 'batch_size': 32.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 3.0, 'hidden_units': 160.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00016, 'type': 'adam'}}
    #word2vec_concat = {'batch_norm': 'before_act', 'batch_size': 32.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 96.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00075, 'type': 'sgd'}}
    #word2vec_hier =   {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0024, 'type': 'sgd'}}
    #fasttext_mean =   {'batch_norm': 'before_act', 'batch_size': 224.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0032, 'type': 'sgd'}}
    #fasttex_max =     {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00016, 'type': 'adam'}}
    #fasttext_concat = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.6, 'hidden_layers': 2.0, 'hidden_units': 224.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00048, 'type': 'adam'}}
    #fasttext_hier =   {'batch_norm': 'no', 'batch_size': 64.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00025, 'type': 'adam'}}
    #doc2vec-dbow =    {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 160.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0017, 'type': 'sgd'}}
    #doc2vec-dmpv =    {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 224.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0040, 'type': 'sgd'}},
    #doc2vec-concat =  {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0025, 'type': 'sgd'}}
    #sdv =             {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0029, 'type': 'sgd'}}

    params.update(bow)
    params_MLP = dict(params)

    # MLPで予測
    feature = "bow"
    runner = Runner(run_name='MLP1',
                    model_cls=ModelMLP,
                    features=feature,
                    params=params_MLP)
    runner.run_train_cv()
import sys, os
sys.path.append('../')

import numpy as np
import pandas as pd
from src.runner import Runner
from src.model_NB import ModelMultinomialNB

if __name__ == '__main__':
    params = {
        'alpha' : 1.0,
        'fit_prior' : True,
        'class_prior' : None
    }

    #### Best Parameters
    bow =             { 'alpha' : 1.0 }
    #tf-tdf =          { 'alpha' : 1.0 }
    #n-gram =          { 'alpha' : 1.0 }
    #ngram-tf-idf  =   { 'alpha' : 0.1 }

    params.update(bow)
    params_NB = dict(params)

    # Naive Beys での分析
    feature = "bow"
    runner = Runner(run_name='NB1', model_cls=ModelMultinomialNB, features=feature, params=params_NB)
    runner.run_train_cv()
Beispiel #18
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default='./configs/model_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({'args': {'config': args.config, 'debug': args.debug}})

    if config["model"]["name"] == "lightgbm":
        config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (pathlib.Path(config['dataset']['output_directory']) /
                        model_no)
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({'model_output_dir': str(model_output_dir)})

    # =========================================
    # === Loading data
    # =========================================
    logger.info('Loading data')

    # Get train and test
    input_dir = pathlib.Path(config['dataset']['input_directory'])
    train = pd.read_csv(input_dir / 'train.csv')
    test = pd.read_csv(input_dir / 'test.csv')

    # Get target values
    target_column = config['data_type']['target']
    y_train = train[target_column].values

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')

    # Get features
    x_train, x_test = load_features(config)
    feature_name = x_test.columns
    logger.debug(f'number of features: {len(feature_name)}')

    # =========================================
    # === Adversarial Validation
    # =========================================
    logger.info("adversarial validation")
    train_adv = x_train
    test_adv = x_test
    train_adv['target'] = 0
    test_adv['target'] = 1
    train_test_adv = pd.concat([train_adv, test_adv], axis=0,
                               sort=False).reset_index(drop=True)
    target = train_test_adv['target'].values

    train_set, val_set = train_test_split(train_test_adv,
                                          test_size=0.33,
                                          random_state=71,
                                          shuffle=True)
    x_train_adv = train_set[feature_name]
    y_train_adv = train_set['target']
    x_val_adv = val_set[feature_name]
    y_val_adv = val_set['target']
    logger.debug(f'the number of train set: {len(x_train_adv)}')
    logger.debug(f'the number of valid set: {len(x_val_adv)}')

    train_lgb = lgb.Dataset(x_train_adv, label=y_train_adv)
    val_lgb = lgb.Dataset(x_val_adv, label=y_val_adv)
    lgb_model_params = config["adversarial_validation"]["lgb_model_params"]
    lgb_train_params = config["adversarial_validation"]["lgb_train_params"]
    clf = lgb.train(lgb_model_params,
                    train_lgb,
                    valid_sets=[train_lgb, val_lgb],
                    valid_names=['train', 'valid'],
                    **lgb_train_params)

    feature_imp = pd.DataFrame(sorted(
        zip(clf.feature_importance(importance_type='gain'), feature_name)),
                               columns=['value', 'feature'])
    plt.figure(figsize=(20, 10))
    sns.barplot(x='value',
                y='feature',
                data=feature_imp.sort_values(by='value',
                                             ascending=False).head(20))
    plt.title('LightGBM Features')
    plt.tight_layout()
    plt.savefig(model_output_dir / "feature_importance_adv.png")

    config.update({
        'adversarial_validation_result': {
            'score':
            clf.best_score,
            'feature_importances':
            feature_imp.set_index("feature").sort_values(
                by="value", ascending=False).head(20).to_dict()["value"]
        }
    })

    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Get features
    x_train, x_test = load_features(config)
    feature_name = x_test.columns
    logger.debug(f'number of features: {len(feature_name)}')

    # Get folds
    folds_ids = Fold(
        n_splits=config['cv']['n_splits'],
        shuffle=config['cv']['shuffle'],
        random_state=config['cv']['random_state']).get_stratifiedkfold(
            x_train, y_train)

    # Train and predict
    model_name = config['model']['name']
    model_cls = model_map[model_name]
    params = config['model']
    runner = Runner(model_cls, params, model_output_dir,
                    f'Train_{model_cls.__name__}')

    oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids)
    config.update(evals_result)
    test_preds = runner.predict_cv(x_test)

    # =========================================
    # === Make submission file
    # =========================================
    sub = create_submission(test, test_preds, target_column)
    sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True)

    # =========================================
    # === Save files
    # =========================================
    save_path = model_output_dir / 'output.json'
    json_dump(config, save_path)

    pd.DataFrame(oof_preds,
                 columns=["target"]).to_csv(model_output_dir / 'oof.csv',
                                            index=False,
                                            header=True)
Beispiel #19
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({
        'args': {
            'config': args.config,
            'debug': args.debug
        }
    })
    config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (
        pathlib.Path(config['model_dir_name']) /
        pathlib.Path(config['dataset']['output_directory']) / model_no
    )
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({
        'model_output_dir': str(model_output_dir)
    })

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')
    logger.info(f'targets: {config["target"]}')
    logger.info(f'features: {config["features"]}')

    # features
    x_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["features"])

    # targets
    y_train_set = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["target"])

    # folds
    folds_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["folds"])

    logger.debug(f'y_train_set: {y_train_set.shape}')
    logger.debug(f'x_train: {x_train.shape}')

    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Get target values
    y_train = y_train_set["Target_answered_correctly"].values

    # Get folds
    trn_idx = folds_train.query("Fold_val != 1").index
    val_idx = folds_train.query("Fold_val == 1").index
    folds_ids = [(trn_idx, val_idx)]
    logger.debug(f"n_trn={len(trn_idx)}, n_val={len(val_idx)}")
    logger.debug(f"trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}")

    # Train and predict
    model_cls = model_map[config['model']['name']]
    model_params = config['model']
    runner = Runner(
        model_cls, model_params, model_output_dir, f'{model_cls.__name__}', n_fold=1,
    )
    oof_preds, evals_result, importances = runner.train_cv(
        x_train, y_train, folds_ids)
    config.update(evals_result)

    # Save importances
    importances.mean(axis=1).reset_index().rename(
        columns={"index": "feature", 0: "value"}
    ).sort_values("value", ascending=False).to_csv(
        model_output_dir / "importances.csv", index=False
    )

    # Save oof-pred file
    oof_preds_file_name = f"oof_pred"
    np.save(model_output_dir / oof_preds_file_name, oof_preds)
    logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}')

    # Save files (override)
    logger.info('Save files')
    save_path = model_output_dir / 'output.json'
    json_dump(config, save_path)
    logger.info(f'Save model log: {save_path}')

    # =========================================
    # === Upload to GCS
    # =========================================
    if not args.debug:
        logger.info('Upload to GCS')

        bucket_dir_name = config["model_dir_name"] + "/" + model_no
        logger.info(f'bucket_dir_name: {bucket_dir_name}')

        files = list(model_output_dir.iterdir())
        upload_to_gcs(bucket_dir_name, files)
Beispiel #20
0
        'batch_norm': 'before_act',
        'optimizer': {
            'type': 'adam',
            'lr': 0.005
        },
        'batch_size': 100,
        'nb_epoch': 500,
        'embedding_model': None,
        'Bidirectional': False,
    }
    #  双方向LSATM
    #params = {

    #}

    # fasttext.bin は compress.py でボキャブラリを圧縮したファイル
    params['embedding_model'] = KeyedVectors.load_word2vec_format(
        './fasttext.bin', binary=True)
    params_LSTM = dict(params)

    # features には必ず raw_textを指定
    runner = Runner(run_name='LSTM1',
                    model_cls=ModelLSTM,
                    features="raw_text",
                    params=params_LSTM)

    # 1回だけ実行
    # runner.train_fold(0)
    # クロスバリデーションで実行
    runner.run_train_cv()
Beispiel #21
0

#
# INITIALIZATION
#
    environment_directory = str(args[1])
    identifier = str(args[2])
    log_directory = str(args[3])

    # Configure logging parameters so we get output while the program runs
    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                        filename=log_directory + identifier + ".log", level=logging.INFO)
    logging.info('START logging for run: %s',  environment_directory + identifier + ".xml")

    environment = Environment(environment_directory,  identifier)
    runner = Runner(environment)

#
# UPDATE STEP
#
    for i in range(int(environment.num_simulations)):
        logging.info('  STARTED with run %s',  str(i))
        environment.initialize(environment_directory,  identifier)
        runner.initialize(environment)
        # do the run
        runner.do_run(environment)
        logging.info('  DONE')

#
# MEASUREMENT AND LOGGING
#
Beispiel #22
0
        'silent': 1,
        'random_state': 71,
        'num_boost_round': 10000,
        'early_stopping_rounds': 10,
        'n_estimator': 500
    }

    #### Best Parameters
    bow = {'num_leaves': 32, 'colsample_bytree': 0.466}
    #tf-idf =          { 'num_leaves' : 22, 'colsample_bytree' : 0.540 }
    #n-gram =          { 'num_leaves' : 34, 'colsample_bytree' : 0.689 }
    #ngram-tf-idf  =   { 'num_leaves' : 26, 'colsample_bytree' : 0.393 }
    #word2vec_mean =   { 'num_leaves' : 20, 'colsample_bytree' : 0.379 }
    #word2vec_max =    { 'num_leaves' : 22, 'colsample_bytree' : 0.387 }
    #word2vec_concat = { 'num_leaves' : 16, 'colsample_bytree' : 0.310 }
    #word2vec_hier =   { 'num_leaves' : 30, 'colsample_bytree' : 0.888 }
    #fasttext_mean =   { 'num_leaves' : 34, 'colsample_bytree' : 0.546, 'subsample' : 0.7725, 'learning_rate': 0.01 }
    #fasttex_max =     { 'num_leaves' : 28, 'colsample_bytree' : 0.447 }
    #fasttext_concat = { 'num_leaves' : 12, 'colsample_bytree' : 0.344 }
    #fasttext_hier =   { 'num_leaves' : 10, 'colsample_bytree' : 0.319 }
    #doc2vec-dbow =    { 'num_leaves' : 46, 'colsample_bytree' : 0.303, 'subsample' : 0.879, 'learning_rate': 0.01 }
    #doc2vec-dmpv =    { 'num_leaves' : 30, 'colsample_bytree' : 0.597, 'subsample' : 0.910, 'learning_rate': 0.01 }
    #doc2vec-concat =  { 'num_leaves' : 25, 'colsample_bytree' : 0.624, 'subsample' : 0.590, 'learning_rate': 0.05 }
    #sdv =             {'colsample_bytree': '0.539', 'learning_rate': 0.01, 'num_leaves': 56, 'subsample': 0.942}

    params_lgb.update(bow)
    params_lgb_all = dict(params_lgb)

    # LightGBMによる学習・予測
    runner = Runner('lgb1', ModelLGB, "bow", params_lgb_all)
    runner.run_train_cv()
        'gru_dropout': 0.3,
        'recurrent_dropout': 0.3,
        'hidden_layers': 3,
        'hidden_units': 128,
        'hidden_activation': 'relu',
        'hidden_dropout': 0.3,
        'batch_norm': 'before_act',
        'optimizer': {
            'type': 'adam',
            'lr': 0.001
        },
        'batch_size': 100,
        'nb_epoch': 500,
        'embedding_model': None
    }

    # fasttext.bin は compress.py でボキャブラリを圧縮したファイル
    params['embedding_model'] = KeyedVectors.load_word2vec_format(
        './fasttext.bin', binary=True)
    params_GRU = dict(params)

    # features には必ず raw_textを指定
    runner = Runner(run_name='GRU1',
                    model_cls=ModelGRU,
                    features="raw_text",
                    params=params_GRU)

    # 1回だけ実行
    # runner.train_fold(0)
    # クロスバリデーションで実行
    runner.run_train_cv()
        'verbose' : 1,
        'warm_start' : False,
        'n_jobs' : None, 
        'l1_ratio' : None,
    }
    #### Best Parameters
    bow =             { 'C' : 0.001 }
    #tf-idf =          { 'C' : 1.0 }
    #n-gram =          { 'C' : 1.0 }
    #ngram-tf-idf =    { 'C' : 0.1 }
    #word2vec_mean =   { 'C' : 0.1 }
    #word2vec_max =    { 'C' : 0.1 }
    #word2vec_concat = { 'C' : 10.0 }
    #word2vec_hier =   { 'C' : 0.1 }
    #fasttext_mean =   { 'C' : 0.001 }
    #fasttex_max =     { 'C' : 0.001 }
    #fasttext_concat = { 'C' : 0.001 }
    #fasttext_hier =   { 'C' : 0.001 }
    #doc2vec-dbow =    { 'C' : 0.001 }
    #doc2vec-dmpv =    { 'C' : 0.1   }
    #doc2vec-concat =  { 'C' : 0.001 }
    #sdv =             { 'C' : 0.001 }

    params.update(bow)
    params_logistic = dict(params)

    # Logistic Regression での予測
    feature = "bow"
    runner = Runner(run_name='logis', model_cls=ModelLogistic, features=feature, params=params_logistic)
    runner.run_train_cv()
Beispiel #25
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default='./configs/model_1dcnn_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({'args': {'config': args.config, 'debug': args.debug}})

    if config["model"]["name"] == "lightgbm":
        config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (pathlib.Path(config['dataset']['output_directory']) /
                        model_no)
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({'model_output_dir': str(model_output_dir)})

    # =========================================
    # === Loading data
    # =========================================
    logger.info('Loading data')

    # Get train and test
    input_dir = pathlib.Path(config['dataset']['input_directory'])
    train = pd.read_csv(input_dir / 'train.csv')
    test = pd.read_csv(input_dir / 'test.csv')

    spectrum = pd.read_csv(input_dir / 'spectrum_stack.csv')
    spectrum_fitting = pd.read_csv(input_dir / 'spectrum_fitting_stack.csv')
    wv_cols = [f"wavelength_{i}" for i in range(512)]
    wv_fit_cols = [f"fitting_wavelength_{i}" for i in range(512)]

    train_spectrum = pd.merge(train,
                              spectrum,
                              on="spectrum_filename",
                              how="left")
    test_spectrum = pd.merge(test,
                             spectrum,
                             on="spectrum_filename",
                             how="left")
    train_spectrum = pd.merge(train_spectrum,
                              spectrum_fitting,
                              on="spectrum_filename",
                              how="left")
    test_spectrum = pd.merge(test_spectrum,
                             spectrum_fitting,
                             on="spectrum_filename",
                             how="left")

    train_std = np.std(train_spectrum[wv_cols].values, axis=1, keepdims=True)
    test_std = np.std(test_spectrum[wv_cols].values, axis=1, keepdims=True)
    train_spectrum[wv_cols] = train_spectrum[wv_cols].values / train_std
    test_spectrum[wv_cols] = test_spectrum[wv_cols].values / test_std

    spectrum_cols = wv_cols + wv_fit_cols
    train_spectrum = train_spectrum[spectrum_cols]
    test_spectrum = test_spectrum[spectrum_cols]

    # Get target values
    target_column = config['data_type']['target']
    y_train = train[target_column].values

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')

    # Get features
    x_train, x_test = load_features(config)
    feature_name = x_test.columns
    logger.debug(f'number of features: {len(feature_name)}')

    # =========================================
    # === features preprocess
    # =========================================
    x_total = x_train.append(x_test).reset_index(drop=True)
    remove_features = [c for c in x_total.columns if c.find("layout_x") != -1]
    remove_features += [c for c in x_total.columns if c.find("layout_y") != -1]
    x_total.drop(columns=remove_features, inplace=True)

    x_total = pd.get_dummies(
        x_total, columns=["LabelEncoding_exc_wl", "LabelEncoding_layout_a"])
    x_total.fillna(0, inplace=True)

    from sklearn.preprocessing import StandardScaler
    numeric_features = [
        c for c in x_total.columns if c.find("LabelEncoding_") == -1
    ]
    sc = StandardScaler()
    x_total[numeric_features] = sc.fit_transform(x_total[numeric_features])

    x_train = x_total.iloc[:len(train)]
    x_test = x_total.iloc[len(train):].reset_index(drop=True)

    x_train = pd.concat([x_train, train_spectrum], axis=1)
    x_test = pd.concat([x_test, test_spectrum], axis=1)
    logger.debug(f'number of features with spec in train: {x_train.shape}')
    logger.debug(f'number of features with spec in test: {x_test.shape}')

    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Get folds
    folds_ids = Fold(
        n_splits=config['cv']['n_splits'],
        shuffle=config['cv']['shuffle'],
        random_state=config['cv']['random_state']).get_stratifiedkfold(
            x_train, y_train)

    # Train and predict
    model_name = config['model']['name']
    model_cls = model_map[model_name]
    params = config['model']
    runner = Runner(model_cls, params, model_output_dir,
                    f'Train_{model_cls.__name__}')

    oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids)
    config.update(evals_result)
    test_preds = runner.predict_cv(x_test)

    # =========================================
    # === Make submission file
    # =========================================
    sub = create_submission(test, test_preds, target_column)
    sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True)

    # =========================================
    # === Save files
    # =========================================
    save_path = model_output_dir / 'output.json'
    json_dump(config, save_path)

    pd.DataFrame(oof_preds,
                 columns=["target"]).to_csv(model_output_dir / 'oof.csv',
                                            index=False,
                                            header=True)
import sys, os
sys.path.append('../')

import numpy as np
import pandas as pd
from src.runner import Runner
from src.model_GaussNB import ModelGaussNB

if __name__ == '__main__':
    params = {'priors': None, 'var_smoothing': 1e-09}
    params_NB = dict(params)

    # 特徴量を指定して実行
    feature = "bow"
    runner = Runner(run_name='GNB1',
                    model_cls=ModelGaussNB,
                    features=feature,
                    params=params_NB)

    # 1回だけ実行
    # runner.train_fold(0)
    # クロスバリデーションで実行
    runner.run_train_cv()
Beispiel #27
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({
        'args': {
            'config': args.config,
            'debug': args.debug
        }
    })
    config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (
        pathlib.Path(config['model_dir_name']) /
        pathlib.Path(config['dataset']['output_directory']) / model_no
    )
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({
        'model_output_dir': str(model_output_dir)
    })

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')
    logger.info(f'targets: {config["target"]}')
    logger.info(f'features: {config["features"]}')
    logger.info(f'keys: {config["key"]}')
    logger.info(f'folds: {config["folds"]}')

    # features
    x_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["features"])
    x_test = FeatureLoader(
        data_type=config["test_data_type"], debugging=args.debug
        ).load_features(config["features"])

    # targets
    y_train_set = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["target"])

    # keys
    key_test = FeatureLoader(
        data_type=config["test_data_type"], debugging=args.debug
        ).load_features(config["key"])

    # folds
    folds_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["folds"])

    logger.debug(f'test_data_type: {config["test_data_type"]}')
    logger.debug(f'y_train_set: {y_train_set.shape}')
    logger.debug(f'x_train: {x_train.shape}')
    logger.debug(f'x_test: {x_test.shape}')
    logger.debug(f'key_test: {key_test.shape}')


    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Modeling
    target_columns = [
        "reply_engagement",
        "retweet_engagement",
        "retweet_with_comment_engagement",
        "like_engagement",
    ]
    for cat in target_columns:
        logger.info(f'============= {cat} =============')

        # Get target values
        y_train = y_train_set[f"TargetCategories_{cat}"].values

        # Get folds
        folds_col = ["StratifiedGroupKFold_retweet_with_comment_engagement"]
        assert len(folds_col) == 1, "The number of fold column must be one"
        folds = folds_train[folds_col]
        n_fold = folds.max().values[0] + 1
        folds_ids = []

        logger.debug(f"total pos: {y_train.sum()}")
        for i in range(n_fold):
            trn_idx = folds[folds != i].dropna().index
            val_idx = folds[folds == i].dropna().index
            folds_ids.append((trn_idx, val_idx))
            logger.debug(f"{i+1}fold: n_trn={len(trn_idx)}, n_val={len(val_idx)}")
            logger.debug(f"{i+1}fold: trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}")

        # Train and predict
        model_cls = model_map[config['model']['name']]
        model_params = config['model']
        runner = Runner(
            model_cls, model_params, model_output_dir, f'Train_{model_cls.__name__}_{cat}'
        )
        oof_preds, test_preds, evals_result = runner.train_cv(
            x_train, y_train, x_test, folds_ids, config)

        evals_result[f"evals_result_{cat}"] = evals_result["evals_result"]
        evals_result.pop("evals_result")
        config.update(evals_result)

        # Save oof-pred file
        oof_preds_file_name = f"{cat}_oof_pred"
        np.save(model_output_dir / oof_preds_file_name, oof_preds)
        logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}')

        # Make submission file
        sub = pd.concat([key_test, pd.Series(test_preds).rename("pred")], axis=1)
        sub = sub[["KeyCategories_tweet_id", "KeyCategories_engaging_user_id", "pred"]]
        sub_file_name = f"{cat}_submission_{config['test_data_type']}.csv"
        sub.to_csv(model_output_dir/ sub_file_name, index=False, header=False)
        logger.info(f'Save submission file: {model_output_dir/ sub_file_name}')

        # Save files (override)
        logger.info('Save files')
        save_path = model_output_dir / 'output.json'
        json_dump(config, save_path)
        logger.info(f'Save model log: {save_path}')

    # =========================================
    # === Upload to GCS
    # =========================================
    if not args.debug:
        logger.info('Upload to GCS')

        bucket_dir_name = config["model_dir_name"] + "/" + model_no
        logger.info(f'bucket_dir_name: {bucket_dir_name}')

        files = list(model_output_dir.iterdir())
        upload_to_gcs(bucket_dir_name, files)
# load neighbourhood data
with open('parameters/lock_down/neighbourhood_data.json') as json_file:
    neighbourhood_data = json.load(json_file)

# load age data
age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0)
age_distribution_per_ward = dict(age_distribution.transpose())

# Monte Carlo simulation
for seed in range(parameters['monte_carlo_runs']):
    # make new folder for seed, if it does not exist
    if not os.path.exists('measurement/lockdown/seed{}'.format(seed)):
        os.makedirs('measurement/lockdown/seed{}'.format(seed))

    # initialization
    environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward)

    # running the simulation
    runner = Runner()
    runner.lock_down(environment, seed)

    # save network
    if not parameters["high_performance"]:
        for idx, network in enumerate(environment.infection_states):
            for i, node in enumerate(network.nodes):
                network.nodes[i]['agent'] = network.nodes[i]['agent'].status

            idx_string = '{0:04}'.format(idx)
            nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
Beispiel #29
0
from src.agent import KArmedBanditAgent
from src.environment import KArmedBanditEnvironment
from src.runner import Runner

k = 10
n_bandits = 3

env = KArmedBanditEnvironment(k=k, n_bandits=n_bandits)
agent = KArmedBanditAgent(k=k, n_bandits=n_bandits)
runner = Runner(env, agent, iterations=5000)
runner.run()
runner.plot_environment()
runner.plot_selected_actions()
runner.plot_value_function()