Exemple #1
0
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('read_path')
    arg_parser.add_argument('write_path')
    args = arg_parser.parse_args()

    with open(args.read_path, 'r') as source:
        text = source.read()

        lexer = Lexer(text)
        tokens = lexer.lex()

        parser = Parser(tokens)
        ast = parser.parse()

        symbolizer = Symbolizer(ast)
        symbolizer.symbolize()

        optimizer = Optimizer(ast)
        optimizer.optimize()

        grapher = Grapher(ast)
        grapher.graph()

        generator = Generator(ast)
        generator.generate(args.write_path)

        runner = Runner(ast)
        runner.run()
Exemple #2
0
def main():

    try:
        config = Config.load()

        init_logging(config)

        runner = Runner(config)
        runner.run()

        return 0

    except KeyboardInterrupt:
        _logger.info("aborted.")
        return 0

    except MessageException as ex:
        _logger.error(ex)
        _logger.error("aborted!")
        return 1

    except Exception as ex:
        _logger.exception(ex)
        _logger.error("aborted!")
        # no runner.close() to signal abnormal termination!
        return 1
    def test_run_proc(self, cfg_read):
        with mock.patch('src.runner.multiprocessing',
                        autospec=True) as multi_process_mock:
            runner = Runner()
            procs = [runner.web_monitor_proc]
            runner.run_procs(procs, cfg_read)

            multi_process_mock.Process.assert_called_once_with(
                target=procs[0], args=(cfg_read, ))
            multi_process_mock.Process.return_value.join.assert_called_once()
def main(is_debug):
    """ Training Pipeline
    """
    with open("./config.yaml") as yf:
        config = yaml.safe_load(yf)

    # run single models
    for config_ in config["models"]:
        pprint.pprint(config_)
        runner = Runner(settings, AttrDict(config_))
        runner.run(is_debug=args.debug, multi_gpu=args.multi_gpu)
    def test_web_monitor_proc(self, asyncio_mock, web_monitor_app_mock,
                              cfg_read):
        runner = Runner()
        runner.web_monitor_proc(cfg_read)

        asyncio_mock.get_event_loop.assert_called_once()
        loop_mock = asyncio_mock.get_event_loop.return_value
        loop_mock.stop.assert_called_once()
        loop_mock.run_until_complete.assert_called_once()

        web_monitor_app_mock.return_value.run.assert_called_once()
        web_monitor_app_mock.return_value.stop.assert_called_once()
    def test_stats_consumer_proc(self, asyncio_mock, consumer_app_mock,
                                 db_mock, cfg_read):
        runner = Runner()
        runner.stats_consumer_proc(cfg_read)

        asyncio_mock.get_event_loop.assert_called_once()
        loop_mock = asyncio_mock.get_event_loop.return_value
        loop_mock.stop.assert_called_once()
        loop_mock.run_until_complete.assert_called_once()

        db_mock.return_value.clean_up.assert_called_once()

        consumer_app_mock.return_value.run.assert_called_once()
        consumer_app_mock.return_value.stop.assert_called_once()
Exemple #7
0
    def test_find_existing_file_or_new_mail_path_1(self):
        # test that file already exists!
        class DummyMail:
            def __init__(self):
                self.uid = 123
                self.date = datetime(2020, 9, 10, 18, 7, 6)
                self.subject = " Fwd: Re: Fwd: fwD:    re:  123 "
                self.to = (
                    "*****@*****.**",
                    "*****@*****.**",
                )
                self.from_ = "*****@*****.**"
                self.raw_data = bytearray(b"\x00\x11\x0F")  # no mail data

        mail = DummyMail()

        test_path = os.path.join(os.path.dirname(__file__), "../__test__/find")
        os.makedirs(test_path, exist_ok=True)

        mail_path = os.path.join(test_path, "orig.no-eml")
        with open(mail_path, "wb") as file:
            file.write(mail.raw_data)

        result = Runner.find_existing_file_or_new_mail_path(mail, mail_path)
        self.assertEqual(result, None)  # file exits already
def main():
    server_socket = get_server_socket()
    while True:
        connection, address = server_socket.accept()
        connection.send('accepted'.encode())
        print('server start')
        Runner(ClientInfo(connection, address)).start()
    def measurement__set_csv_writer(self, args):
        import os
        from src.bank import Bank
        from src.household import Household
        from src.firm import Firm
        from src.environment import Environment
        from src.transaction import Transaction
        from src.market import Market
        from src.runner import Runner
        from src.measurement import Measurement

        text = "This test checks measurement.set_csv_writer \n"
        self.print_info(text)
        #
        # INITIALIZATION
        #
        environment_directory = str(args[0])
        identifier = str(args[1])
        log_directory = str(args[2])

        # Configure logging parameters so we get output while the program runs
        logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                            filename=log_directory + identifier + ".log", level=logging.INFO)
        logging.info('START logging for test measurement__set_csv_writer in run: %s',
                     environment_directory + identifier + ".xml")

        # Construct household filename
        environment = Environment(environment_directory,  identifier)

        # Construct a runner
        runner = Runner(environment)

        # generate a bank
        bank = Bank()
        bank.identifier = "test_bank"
        environment.banks.append(bank)

        # generate a firm
        firm = Firm()
        firm.identifier = "test_firm"
        environment.firms.append(firm)

        # generate a household
        household = Household()
        household.identifier = "test_household"
        environment.households.append(household)

        #
        # TESTING
        #
        import csv
        file_new = open("__init__.py", "r")
        csv_writer = csv.writer(file_new, lineterminator='\n')
        measurement = Measurement(environment, runner)
        print("Measurement's csv_writer:")
        print(measurement.get_csv_writer())
        measurement.set_csv_writer(csv_writer)
        print("Measurement's csv_writer:")
        print(measurement.get_csv_writer())
Exemple #10
0
    def initialize_shock(self, shock_config):
        from src.runner import Runner
        runner = Runner(self)

        from src.shock import Shock
        shock = Shock(self, runner)
        shock.read_xml_config_file(shock_config)
        self.shocks.append(shock)
Exemple #11
0
def main():
    args = parse_args()
    set_global_seeds(666)
    config = get_config(args.config)
    pprint(config)
    config['train_params'][
        'name'] = f'{config["train_params"]["name"]}/fold{args.fold}'
    factory = Factory(config['train_params'])
    data_factory = DataFactory(config['data_params'], fold=args.fold)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    callbacks = create_callbacks(config['train_params']['name'],
                                 config['dumps'])
    trainer = Runner(stages=config['stages'],
                     factory=factory,
                     callbacks=callbacks,
                     device=device)
    trainer.fit(data_factory)
class TestRunner(unittest.TestCase):
    def setUp(self):
        self.logger = MockLogger()
        self.logger.record_results = mock.MagicMock()
        self.gdb_manager = MockGdbManager()
        self.gdb_manager.read_energy = mock.MagicMock()
        self.runner = Runner(self.logger, self.gdb_manager)

    def testLoggerExists(self):
        self.assertEqual(self.logger, self.runner.logger)

    def testGdbManagerExists(self):
        self.assertEqual(self.gdb_manager, self.runner.gdb_manager)

    def testRunBinaryCallsGdbManager(self):
        old_num_calls = len(self.gdb_manager.read_energy.call_args_list)
        self.runner.run_binary(None, None)
        self.assertEqual(1, len(self.gdb_manager.read_energy.call_args_list) - old_num_calls)
def init(variant,
         ckpt="lve",
         base="",
         prefix="",
         graph_file=None,
         device=None):
    # Initialize model
    # If graph file is specified in config, that will be used
    # If config specifies directory, we'll use `graph_file` for the filename
    # If `graph_file` is None, the (alphabetically) first file will be used

    run_type = "eval"
    exp_config = osp.join("../configs", prefix, f"{variant}.yaml")
    if base != "":
        exp_config = [osp.join("../configs", f"{base}.yaml"), exp_config]
    ckpt_path = f"{variant}.{ckpt}.pth"

    config, ckpt_path = prepare_config(exp_config,
                                       run_type,
                                       ckpt_path, [
                                           "USE_TENSORBOARD",
                                           False,
                                           "SYSTEM.NUM_GPUS",
                                           1,
                                       ],
                                       suffix=prefix,
                                       graph_file=graph_file)
    if graph_file is None and osp.isdir(config.MODEL.GRAPH_FILE):
        config.defrost()
        graphs = sorted(f for f in os.listdir(config.MODEL.GRAPH_FILE)
                        if f.endswith('.edgelist'))
        graph = graphs[0]  # ! Oh shoot. I messed this up.
        config.MODEL.GRAPH_FILE = osp.join(config.MODEL.GRAPH_FILE, graph)
        graph_id = graph[:5]
        add_suffix(config, graph_id)
        ckpt_dir, ckpt_fn = osp.split(ckpt_path)
        ckpt_path = osp.join(ckpt_dir, graph_id, ckpt_fn)
        # Update relative path
        # Incorporate graph file into this loading. Currently, it will use the default one in the config.
        config.freeze()
    runner = Runner(config)
    runner.logger.clear_filehandlers()
    runner.load_device(device=device)
    return runner, ckpt_path
Exemple #14
0
    def runner__do_run(self, args):
        import os
        from src.bank import Bank
        from src.household import Household
        from src.firm import Firm
        from src.environment import Environment  # needed for the Directory
        from src.runner import Runner
        from src.measurement import Measurement

        text = "This test checks runner.do_run \n"
        self.print_info(text)
        #
        # INITIALIZATION
        #
        environment_directory = str(args[0])
        identifier = str(args[1])
        log_directory = str(args[2])

        # Configure logging parameters so we get output while the program runs
        logging.basicConfig(
            format="%(asctime)s %(message)s",
            datefmt="%m/%d/%Y %H:%M:%S",
            filename=log_directory + identifier + ".log",
            level=logging.INFO,
        )
        logging.info("START logging for test runner_do_run in run: %s", environment_directory + identifier + ".xml")

        # Construct bank filename
        environment = Environment(environment_directory, identifier)

        # get the bank_directory from the environment
        bank_directory = environment.bank_directory
        # and loop over all banks in the directory for testing purpose (just one bank)
        listing = os.listdir(bank_directory)
        bank_filename = bank_directory + listing[0]

        # making an instance of the Runner class
        runner = Runner(environment)

        #
        # TESTING
        #
        runner.do_run(environment)
Exemple #15
0
def setup(params, epsilon_action_modifier, parallel_size):
    env_creator = EnvCreator(params.env_name,
                             parallel_size,
                             wrapper=params.env_wrapper,
                             seed=12)

    agent = DQNAgent(env_creator, params.network_fn, epsilon_action_modifier,
                     params.gamma, params.learning_rate,
                     params.target_net_sync, params.use_double_q)
    runner = Runner(env_creator, agent)
    return agent, runner
def start():
    try:
        path = "var/accounts/" + os.getenv('ACCOUNT_FILE_NAME')

        if os.path.isfile(path):

            logger.info('Found file ' + path)

            with open(path) as json_file:

                data = json.load(json_file)

                if not data[0]['username'] or not data[0]['password']:
                    logger.error('Username and password are required')

                for account in data:
                    runner = Runner(account, os.getenv('API_URL'), logger)
                    try:
                        runner.start()
                    except:
                        runner.driver.quit()

        else:
            if not os.getenv('ACCOUNT_FILE_NAME'):

                logger.error('ACCOUNT_FILE_NAME environment variable not set')

            else:

                logger.error('Could not find file: ' + path)

    except Exception as error:
        just_the_string = traceback.format_exc()
        logger.error(just_the_string)
        logger.exception(error)

    return 'Finished'
Exemple #17
0
    def initialize_shock(self, shock_config):
        from src.runner import Runner
        runner = Runner(self)

        from src.shock import Shock
        shock = Shock(self, runner)
        shock.read_xml_config_file(shock_config)
        self.shocks.append(shock)

        shock.measure_intitial_shock(self)
        for k, v in shock.legend.iteritems():
            if shock.legend[k] != 0:
                self.shock_measure = (k, v)
                # df_shock = pd.DataFrame[]

    # you can use this code below to see if the function of reading the shock worked
        for key in shock.asset_returns:
            if shock.asset_returns[key] != 0.0:
                # print "0. ***ENV.PY*** When shock is initialised:  The asset class", key, "is shocked by", shock.asset_returns[key] * 100, "%"
                pass
Exemple #18
0
    ax.set_ylabel('Accuracy', size=14)
    ax.tick_params(labelsize=14)
    plt.savefig(f'../model/tuning/{NAME}-NB.png', dpi=300)


if __name__ == '__main__':
    base_params = {'alpha': 1.0, 'fit_prior': True, 'class_prior': None}
    params_NB = dict(base_params)
    param_grid_ = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

    features = ["bow", "n-gram", "tf-idf", "n-gram-tf-idf"]

    results = []
    NAME = ":".join(features)
    for name in features:
        x = Runner.load_x_train(name)
        y = Runner.load_y_train()
        model = ModelMultinomialNB(name, **dict(params_NB))
        search = GridSearchCV(model,
                              cv=6,
                              param_grid=param_grid_,
                              return_train_score=True,
                              verbose=10,
                              refit=True)
        search.fit(x, y)
        results.append((search, name))
        logger.info(
            f'{name} - bestscore : {search.best_score_} - result :{search.cv_results_["mean_test_score"]}'
        )

    res = pd.DataFrame.from_dict(
Exemple #19
0
    params = {'kernel': 'linear', 'gamma': 0.001}
    params_SVC = dict(params)

    param_grid_ = [{
        'n_components': [10, 30, 50, 100],
        'n_iter': [8, 16],
        'C': [1, 10, 100, 1000]
    }, {
        'apply_svd': [False],
        'C': [1, 10, 100, 1000]
    }]

    feature = [["mfcc", "delta", "power"]]

    results = []
    x = Runner.load_x_train(feature)
    y = Runner.load_y_train()
    model = ModelSVC("SVC", **params_SVC)
    search = GridSearchCV(model,
                          cv=5,
                          param_grid=param_grid_,
                          return_train_score=True)
    search.fit(x, y)
    results.append((search, feature))
    logger.info(
        f'{feature} - bestscore : {search.best_score_} - result :{search.best_params_}'
    )

    for search, name in results:
        logger.info(f'{name} - bestscore : {search.best_score_}')
Exemple #20
0
# load neighbourhood data
with open('parameters/ineffective_lock_down/neighbourhood_data.json') as json_file:
    neighbourhood_data = json.load(json_file)

# load age data
age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0)
age_distribution_per_ward = dict(age_distribution.transpose())

# Monte Carlo simulation
for seed in range(parameters['monte_carlo_runs']):
    # make new folder for seed, if it does not exist
    if not os.path.exists('measurement/inef_lockdown/seed{}'.format(seed)):
        os.makedirs('measurement/inef_lockdown/seed{}'.format(seed))

    # initialization
    environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward)

    # running the simulation
    runner = Runner()
    runner.ineffective_lock_down(environment, seed)

    # save network
    if not parameters["high_performance"]:
        for idx, network in enumerate(environment.infection_states):
            for i, node in enumerate(network.nodes):
                network.nodes[i]['agent'] = network.nodes[i]['agent'].status

            idx_string = '{0:04}'.format(idx)
            nx.write_graphml_lxml(network, "measurement/inef_lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
Exemple #21
0
    environment_directory = str(args[1])
    identifier = str(args[2])
    log_directory = str(args[3])
    measurement_directory = str(args[4])

    # Configure logging parameters so we get output while the program runs
    logging.basicConfig(format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        filename=log_directory + identifier + ".log",
                        level=logging.INFO)
    logging.info('START logging for run: %s',
                 environment_directory + identifier + ".xml")

    environment = Environment()
    environment.initialize(environment_directory, identifier)
    runner = Runner()
    measurement = Measurement()

    #
    # UPDATE STEP
    #
    for i in range(environment.parameters.numSimulations):
        logging.info('  STARTED with run %s', str(i))
        environment.initialize(environment_directory, identifier)
        # check that environment file has been read correctly
        #environment.write_environment_file(identifier)
        runner.initialize(environment)
        measurement.initialize()  # clear the previous measurement

        # do the run
        runner.do_run(measurement, "info")
Exemple #22
0
# -------------------------------------------------------------------------
#
#  MAIN
#
# -------------------------------------------------------------------------
if __name__ == '__main__':

    from src.environment import Environment
    from src.runner import Runner

    args = ["configs/environment/", "environment_config", "log/"]

#
# INITIALIZATION
#
    environment_directory = str(args[0])
    identifier = str(args[1])
    log_directory = str(args[2])

    environment = Environment(environment_directory, identifier)
    runner = Runner(environment)

#
# UPDATE STEP
#
    # for i in range(int(environment.static_parameters['num_simulations'])):
    #     environment.initialize(environment_directory, identifier)
    #     runner.initialize(environment)
    #     # do the run
    #     runner.do_run(environment)
 def setUp(self):
     self.logger = MockLogger()
     self.logger.record_results = mock.MagicMock()
     self.gdb_manager = MockGdbManager()
     self.gdb_manager.read_energy = mock.MagicMock()
     self.runner = Runner(self.logger, self.gdb_manager)
Exemple #24
0
import os, sys
sys.path.append(os.path.abspath('..'))
from src.runner import Runner

'''
Run this script to train and pickle models for app use. Pickeled models get saved to the models directory.
Script also produces various visualizations of the data that can be found in the media directory.
'''

if __name__ == '__main__':
    runner = Runner()

    '''TF-IDF'''
    runner.run_naive_bayes_sentiment()
    runner.run_naive_bayes_topic()

    '''Doc2Vec'''
    runner.run_doc2vec_logreg()
    runner.run_doc2vec_naivebayes()

    '''EDA Plots of Data'''
    # runner.make_plots()
Exemple #25
0
        'input_dropout': 0.05,
        'optimizer': {
            'lr': 0.0015,
            'type': 'adam'
        }
    }
    #tf-idf =          {'batch_norm': 'no', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.0, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0018, 'type': 'adam'}}
    #word2vec_mean =   {'batch_norm': 'before_act', 'batch_size': 256.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00037, 'type': 'adam'}}
    #word2vec_max =    {'batch_norm': 'no', 'batch_size': 32.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 3.0, 'hidden_units': 160.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00016, 'type': 'adam'}}
    #word2vec_concat = {'batch_norm': 'before_act', 'batch_size': 32.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 96.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00075, 'type': 'sgd'}}
    #word2vec_hier =   {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0024, 'type': 'sgd'}}
    #fasttext_mean =   {'batch_norm': 'before_act', 'batch_size': 224.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0032, 'type': 'sgd'}}
    #fasttex_max =     {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00016, 'type': 'adam'}}
    #fasttext_concat = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.6, 'hidden_layers': 2.0, 'hidden_units': 224.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00048, 'type': 'adam'}}
    #fasttext_hier =   {'batch_norm': 'no', 'batch_size': 64.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00025, 'type': 'adam'}}
    #doc2vec-dbow =    {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 160.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0017, 'type': 'sgd'}}
    #doc2vec-dmpv =    {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 224.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0040, 'type': 'sgd'}},
    #doc2vec-concat =  {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0025, 'type': 'sgd'}}
    #sdv =             {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0029, 'type': 'sgd'}}

    params.update(bow)
    params_MLP = dict(params)

    # MLPで予測
    feature = "bow"
    runner = Runner(run_name='MLP1',
                    model_cls=ModelMLP,
                    features=feature,
                    params=params_MLP)
    runner.run_train_cv()
import sys, os
sys.path.append('../')

import numpy as np
import pandas as pd
from src.runner import Runner
from src.model_NB import ModelMultinomialNB

if __name__ == '__main__':
    params = {
        'alpha' : 1.0,
        'fit_prior' : True,
        'class_prior' : None
    }

    #### Best Parameters
    bow =             { 'alpha' : 1.0 }
    #tf-tdf =          { 'alpha' : 1.0 }
    #n-gram =          { 'alpha' : 1.0 }
    #ngram-tf-idf  =   { 'alpha' : 0.1 }

    params.update(bow)
    params_NB = dict(params)

    # Naive Beys での分析
    feature = "bow"
    runner = Runner(run_name='NB1', model_cls=ModelMultinomialNB, features=feature, params=params_NB)
    runner.run_train_cv()
Exemple #27
0
    def runner__init__(self, args):
        import os
        from src.bank import Bank
        from src.household import Household
        from src.firm import Firm
        from src.environment import Environment  # needed for the Directory
        from src.runner import Runner

        text = "This test checks runner.__init__ \n"
        self.print_info(text)
        #
        # INITIALIZATION
        #
        environment_directory = str(args[0])
        identifier = str(args[1])
        log_directory = str(args[2])

        # Configure logging parameters so we get output while the program runs
        logging.basicConfig(
            format="%(asctime)s %(message)s",
            datefmt="%m/%d/%Y %H:%M:%S",
            filename=log_directory + identifier + ".log",
            level=logging.INFO,
        )
        logging.info("START logging for test runner__init__ in run: %s", environment_directory + identifier + ".xml")

        # Construct bank filename
        environment = Environment(environment_directory, identifier)

        # get the bank_directory from the environment
        bank_directory = environment.bank_directory
        # and loop over all banks in the directory for testing purpose (just one bank)
        listing = os.listdir(bank_directory)
        bank_filename = bank_directory + listing[0]

        # generate a household
        household = Household()
        household.identifier = "test_household"
        environment.households.append(household)

        # generate a firm
        firm = Firm()
        firm.identifier = "test_firm"
        environment.firms.append(firm)

        # generate the bank
        bank = Bank()
        environment.banks.append(bank)
        helper = Helper()
        helper.initialize_standard_bank(bank, environment)

        # making an instance of the Runner class
        runner = Runner(environment)
        #
        # TESTING
        #

        text = "Identifier: "
        text = text + runner.get_identifier()
        print(text)

        text = "Number of sweeps:"
        text = text + str(runner.num_sweeps)
        print(text)

        text = "Updater"
        text = text + str(runner.updater)
        print(text)
Exemple #28
0

#
# INITIALIZATION
#
    environment_directory = str(args[1])
    identifier = str(args[2])
    log_directory = str(args[3])

    # Configure logging parameters so we get output while the program runs
    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                        filename=log_directory + identifier + ".log", level=logging.INFO)
    logging.info('START logging for run: %s',  environment_directory + identifier + ".xml")

    environment = Environment(environment_directory,  identifier)
    runner = Runner(environment)

#
# UPDATE STEP
#
    for i in range(int(environment.num_simulations)):
        logging.info('  STARTED with run %s',  str(i))
        environment.initialize(environment_directory,  identifier)
        runner.initialize(environment)
        # do the run
        runner.do_run(environment)
        logging.info('  DONE')

#
# MEASUREMENT AND LOGGING
#
Exemple #29
0
    identifier = str(args[2])
    log_directory = str(args[3])
    measurement_directory = str(args[4])

    # Configure logging parameters so we get output while the program runs
    logging.basicConfig(
        format="%(asctime)s %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        filename=log_directory + identifier + ".log",
        level=logging.INFO,
    )
    logging.info("START logging for run: %s", environment_directory + identifier + ".xml")

    environment = Environment(environment_directory, identifier)
    # environment.initialize(environment_directory,  identifier)
    runner = Runner()
    measurement = Measurement()

    #
    # UPDATE STEP
    #
    for i in range(environment.static_parameters["numSimulations"]):
        logging.info("  STARTED with run %s", str(i))
        environment.initialize(environment_directory, identifier)
        # check that environment file has been read correctly
        # environment.write_environment_file(identifier)
        runner.initialize(environment)
        measurement.initialize()  # clear the previous measurement

        # do the run
        runner.do_run(measurement, "info")
Exemple #30
0
                'lr': hp.loguniform('sgd_lr', np.log(0.00001), np.log(0.01))
            }]),
        'batch_size':
        hp.quniform('batch_size', 32, 128, 32)
    }

    features = ["tf-idf", "n-gram", "n-gram-tf-idf"]

    NAME = "-".join(features)
    result = {}

    # 1つの特徴量について探索するパラメータの組み合わせ数
    max_evals = 100
    # リストfeaturesの各特徴量に対して, 最良なパラメータを探索する
    for i, name in enumerate(features):
        train_x = Runner.load_x_train(name)
        train_y = Runner.load_y_train()
        skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=71)
        tr_idx, va_idx = list(skf.split(train_x, train_y))[0]
        tr_x, va_x = train_x[tr_idx], train_x[va_idx]
        tr_y, va_y = train_y[tr_idx], train_y[va_idx]

        # hyperoptによるパラメータ探索の実行
        trials = Trials()
        history = []
        fmin(objective,
             param_space,
             algo=tpe.suggest,
             trials=trials,
             max_evals=max_evals)
Exemple #31
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({
        'args': {
            'config': args.config,
            'debug': args.debug
        }
    })
    config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (
        pathlib.Path(config['model_dir_name']) /
        pathlib.Path(config['dataset']['output_directory']) / model_no
    )
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({
        'model_output_dir': str(model_output_dir)
    })

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')
    logger.info(f'targets: {config["target"]}')
    logger.info(f'features: {config["features"]}')

    # features
    x_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["features"])

    # targets
    y_train_set = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["target"])

    # folds
    folds_train = FeatureLoader(
        data_type="training", debugging=args.debug
        ).load_features(config["folds"])

    logger.debug(f'y_train_set: {y_train_set.shape}')
    logger.debug(f'x_train: {x_train.shape}')

    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Get target values
    y_train = y_train_set["Target_answered_correctly"].values

    # Get folds
    trn_idx = folds_train.query("Fold_val != 1").index
    val_idx = folds_train.query("Fold_val == 1").index
    folds_ids = [(trn_idx, val_idx)]
    logger.debug(f"n_trn={len(trn_idx)}, n_val={len(val_idx)}")
    logger.debug(f"trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}")

    # Train and predict
    model_cls = model_map[config['model']['name']]
    model_params = config['model']
    runner = Runner(
        model_cls, model_params, model_output_dir, f'{model_cls.__name__}', n_fold=1,
    )
    oof_preds, evals_result, importances = runner.train_cv(
        x_train, y_train, folds_ids)
    config.update(evals_result)

    # Save importances
    importances.mean(axis=1).reset_index().rename(
        columns={"index": "feature", 0: "value"}
    ).sort_values("value", ascending=False).to_csv(
        model_output_dir / "importances.csv", index=False
    )

    # Save oof-pred file
    oof_preds_file_name = f"oof_pred"
    np.save(model_output_dir / oof_preds_file_name, oof_preds)
    logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}')

    # Save files (override)
    logger.info('Save files')
    save_path = model_output_dir / 'output.json'
    json_dump(config, save_path)
    logger.info(f'Save model log: {save_path}')

    # =========================================
    # === Upload to GCS
    # =========================================
    if not args.debug:
        logger.info('Upload to GCS')

        bucket_dir_name = config["model_dir_name"] + "/" + model_no
        logger.info(f'bucket_dir_name: {bucket_dir_name}')

        files = list(model_output_dir.iterdir())
        upload_to_gcs(bucket_dir_name, files)
import sys, os
sys.path.append('../')

import numpy as np
import pandas as pd
from src.runner import Runner
from src.model_GaussNB import ModelGaussNB

if __name__ == '__main__':
    params = {'priors': None, 'var_smoothing': 1e-09}
    params_NB = dict(params)

    # 特徴量を指定して実行
    feature = "bow"
    runner = Runner(run_name='GNB1',
                    model_cls=ModelGaussNB,
                    features=feature,
                    params=params_NB)

    # 1回だけ実行
    # runner.train_fold(0)
    # クロスバリデーションで実行
    runner.run_train_cv()
Exemple #33
0
def main():
    # =========================================
    # === Settings
    # =========================================
    # Get logger
    logger = get_logger(__name__)
    logger.info('Settings')

    # Get argument
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default='./configs/model_0.json')
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logger.info(f'config: {args.config}')
    logger.info(f'debug: {args.debug}')

    # Get config
    config = json.load(open(args.config))
    config.update({'args': {'config': args.config, 'debug': args.debug}})

    if config["model"]["name"] == "lightgbm":
        config["model"]["model_params"]["nthread"] = cpu_count()

    # Create a directory for model output
    model_no = pathlib.Path(args.config).stem
    model_output_dir = (pathlib.Path(config['dataset']['output_directory']) /
                        model_no)
    if not model_output_dir.exists():
        model_output_dir.mkdir()

    logger.info(f'model_output_dir: {str(model_output_dir)}')
    logger.debug(f'model_output_dir exists: {model_output_dir.exists()}')
    config.update({'model_output_dir': str(model_output_dir)})

    # =========================================
    # === Loading data
    # =========================================
    logger.info('Loading data')

    # Get train and test
    input_dir = pathlib.Path(config['dataset']['input_directory'])
    train = pd.read_csv(input_dir / 'train.csv')
    test = pd.read_csv(input_dir / 'test.csv')

    # Get target values
    target_column = config['data_type']['target']
    y_train = train[target_column].values

    # =========================================
    # === Loading features
    # =========================================
    logger.info('Loading features')

    # Get features
    x_train, x_test = load_features(config)
    feature_name = x_test.columns
    logger.debug(f'number of features: {len(feature_name)}')

    # =========================================
    # === Adversarial Validation
    # =========================================
    logger.info("adversarial validation")
    train_adv = x_train
    test_adv = x_test
    train_adv['target'] = 0
    test_adv['target'] = 1
    train_test_adv = pd.concat([train_adv, test_adv], axis=0,
                               sort=False).reset_index(drop=True)
    target = train_test_adv['target'].values

    train_set, val_set = train_test_split(train_test_adv,
                                          test_size=0.33,
                                          random_state=71,
                                          shuffle=True)
    x_train_adv = train_set[feature_name]
    y_train_adv = train_set['target']
    x_val_adv = val_set[feature_name]
    y_val_adv = val_set['target']
    logger.debug(f'the number of train set: {len(x_train_adv)}')
    logger.debug(f'the number of valid set: {len(x_val_adv)}')

    train_lgb = lgb.Dataset(x_train_adv, label=y_train_adv)
    val_lgb = lgb.Dataset(x_val_adv, label=y_val_adv)
    lgb_model_params = config["adversarial_validation"]["lgb_model_params"]
    lgb_train_params = config["adversarial_validation"]["lgb_train_params"]
    clf = lgb.train(lgb_model_params,
                    train_lgb,
                    valid_sets=[train_lgb, val_lgb],
                    valid_names=['train', 'valid'],
                    **lgb_train_params)

    feature_imp = pd.DataFrame(sorted(
        zip(clf.feature_importance(importance_type='gain'), feature_name)),
                               columns=['value', 'feature'])
    plt.figure(figsize=(20, 10))
    sns.barplot(x='value',
                y='feature',
                data=feature_imp.sort_values(by='value',
                                             ascending=False).head(20))
    plt.title('LightGBM Features')
    plt.tight_layout()
    plt.savefig(model_output_dir / "feature_importance_adv.png")

    config.update({
        'adversarial_validation_result': {
            'score':
            clf.best_score,
            'feature_importances':
            feature_imp.set_index("feature").sort_values(
                by="value", ascending=False).head(20).to_dict()["value"]
        }
    })

    # =========================================
    # === Train model and predict
    # =========================================
    logger.info('Train model and predict')

    # Get features
    x_train, x_test = load_features(config)
    feature_name = x_test.columns
    logger.debug(f'number of features: {len(feature_name)}')

    # Get folds
    folds_ids = Fold(
        n_splits=config['cv']['n_splits'],
        shuffle=config['cv']['shuffle'],
        random_state=config['cv']['random_state']).get_stratifiedkfold(
            x_train, y_train)

    # Train and predict
    model_name = config['model']['name']
    model_cls = model_map[model_name]
    params = config['model']
    runner = Runner(model_cls, params, model_output_dir,
                    f'Train_{model_cls.__name__}')

    oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids)
    config.update(evals_result)
    test_preds = runner.predict_cv(x_test)

    # =========================================
    # === Make submission file
    # =========================================
    sub = create_submission(test, test_preds, target_column)
    sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True)

    # =========================================
    # === Save files
    # =========================================
    save_path = model_output_dir / 'output.json'
    json_dump(config, save_path)

    pd.DataFrame(oof_preds,
                 columns=["target"]).to_csv(model_output_dir / 'oof.csv',
                                            index=False,
                                            header=True)
# load neighbourhood data
with open('parameters/lock_down/neighbourhood_data.json') as json_file:
    neighbourhood_data = json.load(json_file)

# load age data
age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0)
age_distribution_per_ward = dict(age_distribution.transpose())

# Monte Carlo simulation
for seed in range(parameters['monte_carlo_runs']):
    # make new folder for seed, if it does not exist
    if not os.path.exists('measurement/lockdown/seed{}'.format(seed)):
        os.makedirs('measurement/lockdown/seed{}'.format(seed))

    # initialization
    environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward)

    # running the simulation
    runner = Runner()
    runner.lock_down(environment, seed)

    # save network
    if not parameters["high_performance"]:
        for idx, network in enumerate(environment.infection_states):
            for i, node in enumerate(network.nodes):
                network.nodes[i]['agent'] = network.nodes[i]['agent'].status

            idx_string = '{0:04}'.format(idx)
            nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
Exemple #35
0
# Dataset1 mfccの主成分分析
import os, sys
sys.path.append('../')

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

from src.runner import Runner

X = Runner.load_x_train(["mfcc"])
print(X.shape)

#標準化
scaler = StandardScaler()
scaler.fit(X)
standard_X = scaler.transform(X)

# 12次元 -> 6次元 へ落とす.
dim = 6
params = {
    'n_components': dim,
    'random_state': 71,
}
# 主成分分析
clf = PCA(**params)
clf.fit(standard_X)
pca = clf.transform(standard_X)