def launch_single(config: CN, run_type: str, ckpt_path: str, clear_only=False): if clear_only: check_exists(config.TENSORBOARD_DIR, preserve=False) check_exists(config.CHECKPOINT_DIR, preserve=False) check_exists(config.LOG_DIR, preserve=False) exit(0) if run_type == "train": if ckpt_path is not None: runner = Runner(config) runner.train(checkpoint_path=ckpt_path) else: if DO_PRESERVE_RUNS: if check_exists(config.TENSORBOARD_DIR) or \ check_exists(config.CHECKPOINT_DIR) or \ check_exists(config.LOG_DIR): exit(1) else: check_exists(config.TENSORBOARD_DIR) check_exists(config.CHECKPOINT_DIR) check_exists(config.LOG_DIR) runner = Runner(config) runner.train() elif run_type == "eval": runner = Runner(config) runner.eval(checkpoint_path=ckpt_path)
def main(): arg_parser = argparse.ArgumentParser() arg_parser.add_argument('read_path') arg_parser.add_argument('write_path') args = arg_parser.parse_args() with open(args.read_path, 'r') as source: text = source.read() lexer = Lexer(text) tokens = lexer.lex() parser = Parser(tokens) ast = parser.parse() symbolizer = Symbolizer(ast) symbolizer.symbolize() optimizer = Optimizer(ast) optimizer.optimize() grapher = Grapher(ast) grapher.graph() generator = Generator(ast) generator.generate(args.write_path) runner = Runner(ast) runner.run()
def main(): try: config = Config.load() init_logging(config) runner = Runner(config) runner.run() return 0 except KeyboardInterrupt: _logger.info("aborted.") return 0 except MessageException as ex: _logger.error(ex) _logger.error("aborted!") return 1 except Exception as ex: _logger.exception(ex) _logger.error("aborted!") # no runner.close() to signal abnormal termination! return 1
def main(): server_socket = get_server_socket() while True: connection, address = server_socket.accept() connection.send('accepted'.encode()) print('server start') Runner(ClientInfo(connection, address)).start()
def measurement__set_csv_writer(self, args): import os from src.bank import Bank from src.household import Household from src.firm import Firm from src.environment import Environment from src.transaction import Transaction from src.market import Market from src.runner import Runner from src.measurement import Measurement text = "This test checks measurement.set_csv_writer \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info('START logging for test measurement__set_csv_writer in run: %s', environment_directory + identifier + ".xml") # Construct household filename environment = Environment(environment_directory, identifier) # Construct a runner runner = Runner(environment) # generate a bank bank = Bank() bank.identifier = "test_bank" environment.banks.append(bank) # generate a firm firm = Firm() firm.identifier = "test_firm" environment.firms.append(firm) # generate a household household = Household() household.identifier = "test_household" environment.households.append(household) # # TESTING # import csv file_new = open("__init__.py", "r") csv_writer = csv.writer(file_new, lineterminator='\n') measurement = Measurement(environment, runner) print("Measurement's csv_writer:") print(measurement.get_csv_writer()) measurement.set_csv_writer(csv_writer) print("Measurement's csv_writer:") print(measurement.get_csv_writer())
def initialize_shock(self, shock_config): from src.runner import Runner runner = Runner(self) from src.shock import Shock shock = Shock(self, runner) shock.read_xml_config_file(shock_config) self.shocks.append(shock)
def test_run_proc(self, cfg_read): with mock.patch('src.runner.multiprocessing', autospec=True) as multi_process_mock: runner = Runner() procs = [runner.web_monitor_proc] runner.run_procs(procs, cfg_read) multi_process_mock.Process.assert_called_once_with( target=procs[0], args=(cfg_read, )) multi_process_mock.Process.return_value.join.assert_called_once()
def main(is_debug): """ Training Pipeline """ with open("./config.yaml") as yf: config = yaml.safe_load(yf) # run single models for config_ in config["models"]: pprint.pprint(config_) runner = Runner(settings, AttrDict(config_)) runner.run(is_debug=args.debug, multi_gpu=args.multi_gpu)
def setup(params, epsilon_action_modifier, parallel_size): env_creator = EnvCreator(params.env_name, parallel_size, wrapper=params.env_wrapper, seed=12) agent = DQNAgent(env_creator, params.network_fn, epsilon_action_modifier, params.gamma, params.learning_rate, params.target_net_sync, params.use_double_q) runner = Runner(env_creator, agent) return agent, runner
def test_web_monitor_proc(self, asyncio_mock, web_monitor_app_mock, cfg_read): runner = Runner() runner.web_monitor_proc(cfg_read) asyncio_mock.get_event_loop.assert_called_once() loop_mock = asyncio_mock.get_event_loop.return_value loop_mock.stop.assert_called_once() loop_mock.run_until_complete.assert_called_once() web_monitor_app_mock.return_value.run.assert_called_once() web_monitor_app_mock.return_value.stop.assert_called_once()
def test_stats_consumer_proc(self, asyncio_mock, consumer_app_mock, db_mock, cfg_read): runner = Runner() runner.stats_consumer_proc(cfg_read) asyncio_mock.get_event_loop.assert_called_once() loop_mock = asyncio_mock.get_event_loop.return_value loop_mock.stop.assert_called_once() loop_mock.run_until_complete.assert_called_once() db_mock.return_value.clean_up.assert_called_once() consumer_app_mock.return_value.run.assert_called_once() consumer_app_mock.return_value.stop.assert_called_once()
def main(): args = parse_args() set_global_seeds(666) config = get_config(args.config) pprint(config) config['train_params'][ 'name'] = f'{config["train_params"]["name"]}/fold{args.fold}' factory = Factory(config['train_params']) data_factory = DataFactory(config['data_params'], fold=args.fold) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') callbacks = create_callbacks(config['train_params']['name'], config['dumps']) trainer = Runner(stages=config['stages'], factory=factory, callbacks=callbacks, device=device) trainer.fit(data_factory)
def init(variant, ckpt="lve", base="", prefix="", graph_file=None, device=None): # Initialize model # If graph file is specified in config, that will be used # If config specifies directory, we'll use `graph_file` for the filename # If `graph_file` is None, the (alphabetically) first file will be used run_type = "eval" exp_config = osp.join("../configs", prefix, f"{variant}.yaml") if base != "": exp_config = [osp.join("../configs", f"{base}.yaml"), exp_config] ckpt_path = f"{variant}.{ckpt}.pth" config, ckpt_path = prepare_config(exp_config, run_type, ckpt_path, [ "USE_TENSORBOARD", False, "SYSTEM.NUM_GPUS", 1, ], suffix=prefix, graph_file=graph_file) if graph_file is None and osp.isdir(config.MODEL.GRAPH_FILE): config.defrost() graphs = sorted(f for f in os.listdir(config.MODEL.GRAPH_FILE) if f.endswith('.edgelist')) graph = graphs[0] # ! Oh shoot. I messed this up. config.MODEL.GRAPH_FILE = osp.join(config.MODEL.GRAPH_FILE, graph) graph_id = graph[:5] add_suffix(config, graph_id) ckpt_dir, ckpt_fn = osp.split(ckpt_path) ckpt_path = osp.join(ckpt_dir, graph_id, ckpt_fn) # Update relative path # Incorporate graph file into this loading. Currently, it will use the default one in the config. config.freeze() runner = Runner(config) runner.logger.clear_filehandlers() runner.load_device(device=device) return runner, ckpt_path
def initialize_shock(self, shock_config): from src.runner import Runner runner = Runner(self) from src.shock import Shock shock = Shock(self, runner) shock.read_xml_config_file(shock_config) self.shocks.append(shock) shock.measure_intitial_shock(self) for k, v in shock.legend.iteritems(): if shock.legend[k] != 0: self.shock_measure = (k, v) # df_shock = pd.DataFrame[] # you can use this code below to see if the function of reading the shock worked for key in shock.asset_returns: if shock.asset_returns[key] != 0.0: # print "0. ***ENV.PY*** When shock is initialised: The asset class", key, "is shocked by", shock.asset_returns[key] * 100, "%" pass
def start(): try: path = "var/accounts/" + os.getenv('ACCOUNT_FILE_NAME') if os.path.isfile(path): logger.info('Found file ' + path) with open(path) as json_file: data = json.load(json_file) if not data[0]['username'] or not data[0]['password']: logger.error('Username and password are required') for account in data: runner = Runner(account, os.getenv('API_URL'), logger) try: runner.start() except: runner.driver.quit() else: if not os.getenv('ACCOUNT_FILE_NAME'): logger.error('ACCOUNT_FILE_NAME environment variable not set') else: logger.error('Could not find file: ' + path) except Exception as error: just_the_string = traceback.format_exc() logger.error(just_the_string) logger.exception(error) return 'Finished'
'input_dropout': 0.05, 'optimizer': { 'lr': 0.0015, 'type': 'adam' } } #tf-idf = {'batch_norm': 'no', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.0, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0018, 'type': 'adam'}} #word2vec_mean = {'batch_norm': 'before_act', 'batch_size': 256.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00037, 'type': 'adam'}} #word2vec_max = {'batch_norm': 'no', 'batch_size': 32.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 3.0, 'hidden_units': 160.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00016, 'type': 'adam'}} #word2vec_concat = {'batch_norm': 'before_act', 'batch_size': 32.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 96.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00075, 'type': 'sgd'}} #word2vec_hier = {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0024, 'type': 'sgd'}} #fasttext_mean = {'batch_norm': 'before_act', 'batch_size': 224.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0032, 'type': 'sgd'}} #fasttex_max = {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00016, 'type': 'adam'}} #fasttext_concat = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.6, 'hidden_layers': 2.0, 'hidden_units': 224.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00048, 'type': 'adam'}} #fasttext_hier = {'batch_norm': 'no', 'batch_size': 64.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00025, 'type': 'adam'}} #doc2vec-dbow = {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 160.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0017, 'type': 'sgd'}} #doc2vec-dmpv = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 224.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0040, 'type': 'sgd'}}, #doc2vec-concat = {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0025, 'type': 'sgd'}} #sdv = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0029, 'type': 'sgd'}} params.update(bow) params_MLP = dict(params) # MLPで予測 feature = "bow" runner = Runner(run_name='MLP1', model_cls=ModelMLP, features=feature, params=params_MLP) runner.run_train_cv()
import sys, os sys.path.append('../') import numpy as np import pandas as pd from src.runner import Runner from src.model_NB import ModelMultinomialNB if __name__ == '__main__': params = { 'alpha' : 1.0, 'fit_prior' : True, 'class_prior' : None } #### Best Parameters bow = { 'alpha' : 1.0 } #tf-tdf = { 'alpha' : 1.0 } #n-gram = { 'alpha' : 1.0 } #ngram-tf-idf = { 'alpha' : 0.1 } params.update(bow) params_NB = dict(params) # Naive Beys での分析 feature = "bow" runner = Runner(run_name='NB1', model_cls=ModelMultinomialNB, features=feature, params=params_NB) runner.run_train_cv()
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument('--config', default='./configs/model_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({'args': {'config': args.config, 'debug': args.debug}}) if config["model"]["name"] == "lightgbm": config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = (pathlib.Path(config['dataset']['output_directory']) / model_no) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({'model_output_dir': str(model_output_dir)}) # ========================================= # === Loading data # ========================================= logger.info('Loading data') # Get train and test input_dir = pathlib.Path(config['dataset']['input_directory']) train = pd.read_csv(input_dir / 'train.csv') test = pd.read_csv(input_dir / 'test.csv') # Get target values target_column = config['data_type']['target'] y_train = train[target_column].values # ========================================= # === Loading features # ========================================= logger.info('Loading features') # Get features x_train, x_test = load_features(config) feature_name = x_test.columns logger.debug(f'number of features: {len(feature_name)}') # ========================================= # === Adversarial Validation # ========================================= logger.info("adversarial validation") train_adv = x_train test_adv = x_test train_adv['target'] = 0 test_adv['target'] = 1 train_test_adv = pd.concat([train_adv, test_adv], axis=0, sort=False).reset_index(drop=True) target = train_test_adv['target'].values train_set, val_set = train_test_split(train_test_adv, test_size=0.33, random_state=71, shuffle=True) x_train_adv = train_set[feature_name] y_train_adv = train_set['target'] x_val_adv = val_set[feature_name] y_val_adv = val_set['target'] logger.debug(f'the number of train set: {len(x_train_adv)}') logger.debug(f'the number of valid set: {len(x_val_adv)}') train_lgb = lgb.Dataset(x_train_adv, label=y_train_adv) val_lgb = lgb.Dataset(x_val_adv, label=y_val_adv) lgb_model_params = config["adversarial_validation"]["lgb_model_params"] lgb_train_params = config["adversarial_validation"]["lgb_train_params"] clf = lgb.train(lgb_model_params, train_lgb, valid_sets=[train_lgb, val_lgb], valid_names=['train', 'valid'], **lgb_train_params) feature_imp = pd.DataFrame(sorted( zip(clf.feature_importance(importance_type='gain'), feature_name)), columns=['value', 'feature']) plt.figure(figsize=(20, 10)) sns.barplot(x='value', y='feature', data=feature_imp.sort_values(by='value', ascending=False).head(20)) plt.title('LightGBM Features') plt.tight_layout() plt.savefig(model_output_dir / "feature_importance_adv.png") config.update({ 'adversarial_validation_result': { 'score': clf.best_score, 'feature_importances': feature_imp.set_index("feature").sort_values( by="value", ascending=False).head(20).to_dict()["value"] } }) # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Get features x_train, x_test = load_features(config) feature_name = x_test.columns logger.debug(f'number of features: {len(feature_name)}') # Get folds folds_ids = Fold( n_splits=config['cv']['n_splits'], shuffle=config['cv']['shuffle'], random_state=config['cv']['random_state']).get_stratifiedkfold( x_train, y_train) # Train and predict model_name = config['model']['name'] model_cls = model_map[model_name] params = config['model'] runner = Runner(model_cls, params, model_output_dir, f'Train_{model_cls.__name__}') oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids) config.update(evals_result) test_preds = runner.predict_cv(x_test) # ========================================= # === Make submission file # ========================================= sub = create_submission(test, test_preds, target_column) sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True) # ========================================= # === Save files # ========================================= save_path = model_output_dir / 'output.json' json_dump(config, save_path) pd.DataFrame(oof_preds, columns=["target"]).to_csv(model_output_dir / 'oof.csv', index=False, header=True)
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument( '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({ 'args': { 'config': args.config, 'debug': args.debug } }) config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = ( pathlib.Path(config['model_dir_name']) / pathlib.Path(config['dataset']['output_directory']) / model_no ) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({ 'model_output_dir': str(model_output_dir) }) # ========================================= # === Loading features # ========================================= logger.info('Loading features') logger.info(f'targets: {config["target"]}') logger.info(f'features: {config["features"]}') # features x_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["features"]) # targets y_train_set = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["target"]) # folds folds_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["folds"]) logger.debug(f'y_train_set: {y_train_set.shape}') logger.debug(f'x_train: {x_train.shape}') # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Get target values y_train = y_train_set["Target_answered_correctly"].values # Get folds trn_idx = folds_train.query("Fold_val != 1").index val_idx = folds_train.query("Fold_val == 1").index folds_ids = [(trn_idx, val_idx)] logger.debug(f"n_trn={len(trn_idx)}, n_val={len(val_idx)}") logger.debug(f"trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}") # Train and predict model_cls = model_map[config['model']['name']] model_params = config['model'] runner = Runner( model_cls, model_params, model_output_dir, f'{model_cls.__name__}', n_fold=1, ) oof_preds, evals_result, importances = runner.train_cv( x_train, y_train, folds_ids) config.update(evals_result) # Save importances importances.mean(axis=1).reset_index().rename( columns={"index": "feature", 0: "value"} ).sort_values("value", ascending=False).to_csv( model_output_dir / "importances.csv", index=False ) # Save oof-pred file oof_preds_file_name = f"oof_pred" np.save(model_output_dir / oof_preds_file_name, oof_preds) logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}') # Save files (override) logger.info('Save files') save_path = model_output_dir / 'output.json' json_dump(config, save_path) logger.info(f'Save model log: {save_path}') # ========================================= # === Upload to GCS # ========================================= if not args.debug: logger.info('Upload to GCS') bucket_dir_name = config["model_dir_name"] + "/" + model_no logger.info(f'bucket_dir_name: {bucket_dir_name}') files = list(model_output_dir.iterdir()) upload_to_gcs(bucket_dir_name, files)
'batch_norm': 'before_act', 'optimizer': { 'type': 'adam', 'lr': 0.005 }, 'batch_size': 100, 'nb_epoch': 500, 'embedding_model': None, 'Bidirectional': False, } # 双方向LSATM #params = { #} # fasttext.bin は compress.py でボキャブラリを圧縮したファイル params['embedding_model'] = KeyedVectors.load_word2vec_format( './fasttext.bin', binary=True) params_LSTM = dict(params) # features には必ず raw_textを指定 runner = Runner(run_name='LSTM1', model_cls=ModelLSTM, features="raw_text", params=params_LSTM) # 1回だけ実行 # runner.train_fold(0) # クロスバリデーションで実行 runner.run_train_cv()
# # INITIALIZATION # environment_directory = str(args[1]) identifier = str(args[2]) log_directory = str(args[3]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info('START logging for run: %s', environment_directory + identifier + ".xml") environment = Environment(environment_directory, identifier) runner = Runner(environment) # # UPDATE STEP # for i in range(int(environment.num_simulations)): logging.info(' STARTED with run %s', str(i)) environment.initialize(environment_directory, identifier) runner.initialize(environment) # do the run runner.do_run(environment) logging.info(' DONE') # # MEASUREMENT AND LOGGING #
'silent': 1, 'random_state': 71, 'num_boost_round': 10000, 'early_stopping_rounds': 10, 'n_estimator': 500 } #### Best Parameters bow = {'num_leaves': 32, 'colsample_bytree': 0.466} #tf-idf = { 'num_leaves' : 22, 'colsample_bytree' : 0.540 } #n-gram = { 'num_leaves' : 34, 'colsample_bytree' : 0.689 } #ngram-tf-idf = { 'num_leaves' : 26, 'colsample_bytree' : 0.393 } #word2vec_mean = { 'num_leaves' : 20, 'colsample_bytree' : 0.379 } #word2vec_max = { 'num_leaves' : 22, 'colsample_bytree' : 0.387 } #word2vec_concat = { 'num_leaves' : 16, 'colsample_bytree' : 0.310 } #word2vec_hier = { 'num_leaves' : 30, 'colsample_bytree' : 0.888 } #fasttext_mean = { 'num_leaves' : 34, 'colsample_bytree' : 0.546, 'subsample' : 0.7725, 'learning_rate': 0.01 } #fasttex_max = { 'num_leaves' : 28, 'colsample_bytree' : 0.447 } #fasttext_concat = { 'num_leaves' : 12, 'colsample_bytree' : 0.344 } #fasttext_hier = { 'num_leaves' : 10, 'colsample_bytree' : 0.319 } #doc2vec-dbow = { 'num_leaves' : 46, 'colsample_bytree' : 0.303, 'subsample' : 0.879, 'learning_rate': 0.01 } #doc2vec-dmpv = { 'num_leaves' : 30, 'colsample_bytree' : 0.597, 'subsample' : 0.910, 'learning_rate': 0.01 } #doc2vec-concat = { 'num_leaves' : 25, 'colsample_bytree' : 0.624, 'subsample' : 0.590, 'learning_rate': 0.05 } #sdv = {'colsample_bytree': '0.539', 'learning_rate': 0.01, 'num_leaves': 56, 'subsample': 0.942} params_lgb.update(bow) params_lgb_all = dict(params_lgb) # LightGBMによる学習・予測 runner = Runner('lgb1', ModelLGB, "bow", params_lgb_all) runner.run_train_cv()
'gru_dropout': 0.3, 'recurrent_dropout': 0.3, 'hidden_layers': 3, 'hidden_units': 128, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'batch_norm': 'before_act', 'optimizer': { 'type': 'adam', 'lr': 0.001 }, 'batch_size': 100, 'nb_epoch': 500, 'embedding_model': None } # fasttext.bin は compress.py でボキャブラリを圧縮したファイル params['embedding_model'] = KeyedVectors.load_word2vec_format( './fasttext.bin', binary=True) params_GRU = dict(params) # features には必ず raw_textを指定 runner = Runner(run_name='GRU1', model_cls=ModelGRU, features="raw_text", params=params_GRU) # 1回だけ実行 # runner.train_fold(0) # クロスバリデーションで実行 runner.run_train_cv()
'verbose' : 1, 'warm_start' : False, 'n_jobs' : None, 'l1_ratio' : None, } #### Best Parameters bow = { 'C' : 0.001 } #tf-idf = { 'C' : 1.0 } #n-gram = { 'C' : 1.0 } #ngram-tf-idf = { 'C' : 0.1 } #word2vec_mean = { 'C' : 0.1 } #word2vec_max = { 'C' : 0.1 } #word2vec_concat = { 'C' : 10.0 } #word2vec_hier = { 'C' : 0.1 } #fasttext_mean = { 'C' : 0.001 } #fasttex_max = { 'C' : 0.001 } #fasttext_concat = { 'C' : 0.001 } #fasttext_hier = { 'C' : 0.001 } #doc2vec-dbow = { 'C' : 0.001 } #doc2vec-dmpv = { 'C' : 0.1 } #doc2vec-concat = { 'C' : 0.001 } #sdv = { 'C' : 0.001 } params.update(bow) params_logistic = dict(params) # Logistic Regression での予測 feature = "bow" runner = Runner(run_name='logis', model_cls=ModelLogistic, features=feature, params=params_logistic) runner.run_train_cv()
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument('--config', default='./configs/model_1dcnn_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({'args': {'config': args.config, 'debug': args.debug}}) if config["model"]["name"] == "lightgbm": config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = (pathlib.Path(config['dataset']['output_directory']) / model_no) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({'model_output_dir': str(model_output_dir)}) # ========================================= # === Loading data # ========================================= logger.info('Loading data') # Get train and test input_dir = pathlib.Path(config['dataset']['input_directory']) train = pd.read_csv(input_dir / 'train.csv') test = pd.read_csv(input_dir / 'test.csv') spectrum = pd.read_csv(input_dir / 'spectrum_stack.csv') spectrum_fitting = pd.read_csv(input_dir / 'spectrum_fitting_stack.csv') wv_cols = [f"wavelength_{i}" for i in range(512)] wv_fit_cols = [f"fitting_wavelength_{i}" for i in range(512)] train_spectrum = pd.merge(train, spectrum, on="spectrum_filename", how="left") test_spectrum = pd.merge(test, spectrum, on="spectrum_filename", how="left") train_spectrum = pd.merge(train_spectrum, spectrum_fitting, on="spectrum_filename", how="left") test_spectrum = pd.merge(test_spectrum, spectrum_fitting, on="spectrum_filename", how="left") train_std = np.std(train_spectrum[wv_cols].values, axis=1, keepdims=True) test_std = np.std(test_spectrum[wv_cols].values, axis=1, keepdims=True) train_spectrum[wv_cols] = train_spectrum[wv_cols].values / train_std test_spectrum[wv_cols] = test_spectrum[wv_cols].values / test_std spectrum_cols = wv_cols + wv_fit_cols train_spectrum = train_spectrum[spectrum_cols] test_spectrum = test_spectrum[spectrum_cols] # Get target values target_column = config['data_type']['target'] y_train = train[target_column].values # ========================================= # === Loading features # ========================================= logger.info('Loading features') # Get features x_train, x_test = load_features(config) feature_name = x_test.columns logger.debug(f'number of features: {len(feature_name)}') # ========================================= # === features preprocess # ========================================= x_total = x_train.append(x_test).reset_index(drop=True) remove_features = [c for c in x_total.columns if c.find("layout_x") != -1] remove_features += [c for c in x_total.columns if c.find("layout_y") != -1] x_total.drop(columns=remove_features, inplace=True) x_total = pd.get_dummies( x_total, columns=["LabelEncoding_exc_wl", "LabelEncoding_layout_a"]) x_total.fillna(0, inplace=True) from sklearn.preprocessing import StandardScaler numeric_features = [ c for c in x_total.columns if c.find("LabelEncoding_") == -1 ] sc = StandardScaler() x_total[numeric_features] = sc.fit_transform(x_total[numeric_features]) x_train = x_total.iloc[:len(train)] x_test = x_total.iloc[len(train):].reset_index(drop=True) x_train = pd.concat([x_train, train_spectrum], axis=1) x_test = pd.concat([x_test, test_spectrum], axis=1) logger.debug(f'number of features with spec in train: {x_train.shape}') logger.debug(f'number of features with spec in test: {x_test.shape}') # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Get folds folds_ids = Fold( n_splits=config['cv']['n_splits'], shuffle=config['cv']['shuffle'], random_state=config['cv']['random_state']).get_stratifiedkfold( x_train, y_train) # Train and predict model_name = config['model']['name'] model_cls = model_map[model_name] params = config['model'] runner = Runner(model_cls, params, model_output_dir, f'Train_{model_cls.__name__}') oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids) config.update(evals_result) test_preds = runner.predict_cv(x_test) # ========================================= # === Make submission file # ========================================= sub = create_submission(test, test_preds, target_column) sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True) # ========================================= # === Save files # ========================================= save_path = model_output_dir / 'output.json' json_dump(config, save_path) pd.DataFrame(oof_preds, columns=["target"]).to_csv(model_output_dir / 'oof.csv', index=False, header=True)
import sys, os sys.path.append('../') import numpy as np import pandas as pd from src.runner import Runner from src.model_GaussNB import ModelGaussNB if __name__ == '__main__': params = {'priors': None, 'var_smoothing': 1e-09} params_NB = dict(params) # 特徴量を指定して実行 feature = "bow" runner = Runner(run_name='GNB1', model_cls=ModelGaussNB, features=feature, params=params_NB) # 1回だけ実行 # runner.train_fold(0) # クロスバリデーションで実行 runner.run_train_cv()
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument( '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({ 'args': { 'config': args.config, 'debug': args.debug } }) config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = ( pathlib.Path(config['model_dir_name']) / pathlib.Path(config['dataset']['output_directory']) / model_no ) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({ 'model_output_dir': str(model_output_dir) }) # ========================================= # === Loading features # ========================================= logger.info('Loading features') logger.info(f'targets: {config["target"]}') logger.info(f'features: {config["features"]}') logger.info(f'keys: {config["key"]}') logger.info(f'folds: {config["folds"]}') # features x_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["features"]) x_test = FeatureLoader( data_type=config["test_data_type"], debugging=args.debug ).load_features(config["features"]) # targets y_train_set = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["target"]) # keys key_test = FeatureLoader( data_type=config["test_data_type"], debugging=args.debug ).load_features(config["key"]) # folds folds_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["folds"]) logger.debug(f'test_data_type: {config["test_data_type"]}') logger.debug(f'y_train_set: {y_train_set.shape}') logger.debug(f'x_train: {x_train.shape}') logger.debug(f'x_test: {x_test.shape}') logger.debug(f'key_test: {key_test.shape}') # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Modeling target_columns = [ "reply_engagement", "retweet_engagement", "retweet_with_comment_engagement", "like_engagement", ] for cat in target_columns: logger.info(f'============= {cat} =============') # Get target values y_train = y_train_set[f"TargetCategories_{cat}"].values # Get folds folds_col = ["StratifiedGroupKFold_retweet_with_comment_engagement"] assert len(folds_col) == 1, "The number of fold column must be one" folds = folds_train[folds_col] n_fold = folds.max().values[0] + 1 folds_ids = [] logger.debug(f"total pos: {y_train.sum()}") for i in range(n_fold): trn_idx = folds[folds != i].dropna().index val_idx = folds[folds == i].dropna().index folds_ids.append((trn_idx, val_idx)) logger.debug(f"{i+1}fold: n_trn={len(trn_idx)}, n_val={len(val_idx)}") logger.debug(f"{i+1}fold: trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}") # Train and predict model_cls = model_map[config['model']['name']] model_params = config['model'] runner = Runner( model_cls, model_params, model_output_dir, f'Train_{model_cls.__name__}_{cat}' ) oof_preds, test_preds, evals_result = runner.train_cv( x_train, y_train, x_test, folds_ids, config) evals_result[f"evals_result_{cat}"] = evals_result["evals_result"] evals_result.pop("evals_result") config.update(evals_result) # Save oof-pred file oof_preds_file_name = f"{cat}_oof_pred" np.save(model_output_dir / oof_preds_file_name, oof_preds) logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}') # Make submission file sub = pd.concat([key_test, pd.Series(test_preds).rename("pred")], axis=1) sub = sub[["KeyCategories_tweet_id", "KeyCategories_engaging_user_id", "pred"]] sub_file_name = f"{cat}_submission_{config['test_data_type']}.csv" sub.to_csv(model_output_dir/ sub_file_name, index=False, header=False) logger.info(f'Save submission file: {model_output_dir/ sub_file_name}') # Save files (override) logger.info('Save files') save_path = model_output_dir / 'output.json' json_dump(config, save_path) logger.info(f'Save model log: {save_path}') # ========================================= # === Upload to GCS # ========================================= if not args.debug: logger.info('Upload to GCS') bucket_dir_name = config["model_dir_name"] + "/" + model_no logger.info(f'bucket_dir_name: {bucket_dir_name}') files = list(model_output_dir.iterdir()) upload_to_gcs(bucket_dir_name, files)
# load neighbourhood data with open('parameters/lock_down/neighbourhood_data.json') as json_file: neighbourhood_data = json.load(json_file) # load age data age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0) age_distribution_per_ward = dict(age_distribution.transpose()) # Monte Carlo simulation for seed in range(parameters['monte_carlo_runs']): # make new folder for seed, if it does not exist if not os.path.exists('measurement/lockdown/seed{}'.format(seed)): os.makedirs('measurement/lockdown/seed{}'.format(seed)) # initialization environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward) # running the simulation runner = Runner() runner.lock_down(environment, seed) # save network if not parameters["high_performance"]: for idx, network in enumerate(environment.infection_states): for i, node in enumerate(network.nodes): network.nodes[i]['agent'] = network.nodes[i]['agent'].status idx_string = '{0:04}'.format(idx) nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
from src.agent import KArmedBanditAgent from src.environment import KArmedBanditEnvironment from src.runner import Runner k = 10 n_bandits = 3 env = KArmedBanditEnvironment(k=k, n_bandits=n_bandits) agent = KArmedBanditAgent(k=k, n_bandits=n_bandits) runner = Runner(env, agent, iterations=5000) runner.run() runner.plot_environment() runner.plot_selected_actions() runner.plot_value_function()