def parse_configs(): env = Env() env.read_env() config.battle_bot_module = env("BATTLE_BOT", 'safest') config.save_replay = env.bool("SAVE_REPLAY", config.save_replay) config.use_relative_weights = env.bool("USE_RELATIVE_WEIGHTS", config.use_relative_weights) config.gambit_exe_path = env("GAMBIT_PATH", config.gambit_exe_path) config.search_depth = int(env("MAX_SEARCH_DEPTH", config.search_depth)) config.greeting_message = env("GREETING_MESSAGE", config.greeting_message) config.battle_ending_message = env("BATTLE_OVER_MESSAGE", config.battle_ending_message) config.websocket_uri = env("WEBSOCKET_URI", "sim.smogon.com:8000") config.username = env("PS_USERNAME") config.password = env("PS_PASSWORD", "") config.bot_mode = env("BOT_MODE") config.team_name = env("TEAM_NAME", None) config.pokemon_mode = env("POKEMON_MODE", constants.DEFAULT_MODE) config.run_count = int(env("RUN_COUNT", 1)) config.room_name = env("ROOM_NAME", config.room_name) config.data_directory = env("DATA_DIR", "dataset") config.data_collector = env.bool("DATA_COLLECTOR", False) config.team_dir = env("TEAM_DIR", None) if config.team_dir != None: files = os.listdir( os.path.join("/showdown", "teams", "teams", config.team_dir)) file = random.choice(files) config.team_name = os.path.join(config.team_dir, file) print(f"Selected team {file}") if config.bot_mode == constants.CHALLENGE_USER: config.user_to_challenge = env("USER_TO_CHALLENGE") init_logging(env("LOG_LEVEL", "DEBUG"))
def setup(in_service=False): config.init(sys.argv[1:]) config.init_logging() sys.excepthook = exceptionLogger port = config.getPort() httpd = httpserver.TivoHTTPServer(('', int(port)), httpserver.TivoHTTPHandler) logger = logging.getLogger('pyTivo') logger.info('Last modified: ' + last_date()) logger.info('Python: ' + platform.python_version()) logger.info('System: ' + platform.platform()) for section, settings in config.getShares(): httpd.add_container(section, settings) b = beacon.Beacon() b.add_service('TiVoMediaServer:%s/http' % port) b.start() if 'listen' in config.getBeaconAddresses(): b.listen() httpd.set_beacon(b) httpd.set_service_status(in_service) logger.info('pyTivo is ready.') return httpd
def parse_configs(): env = Env() env.read_env() config.battle_bot_module = env("BATTLE_BOT", 'safest') config.save_replay = env.bool("SAVE_REPLAY", config.save_replay) config.use_relative_weights = env.bool("USE_RELATIVE_WEIGHTS", config.use_relative_weights) config.gambit_exe_path = env("GAMBIT_PATH", config.gambit_exe_path) config.search_depth = int(env("MAX_SEARCH_DEPTH", config.search_depth)) config.greeting_message = env("GREETING_MESSAGE", config.greeting_message) config.battle_ending_message = env("BATTLE_OVER_MESSAGE", config.battle_ending_message) config.websocket_uri = env("WEBSOCKET_URI", "sim.smogon.com:8000") config.username = env("PS_USERNAME") config.password = env("PS_PASSWORD", "") config.bot_mode = env("BOT_MODE") config.team_name = env("TEAM_NAME", None) config.pokemon_mode = env("POKEMON_MODE", constants.DEFAULT_MODE) config.run_count = int(env("RUN_COUNT", 1)) config.room_name = env("ROOM_NAME", config.room_name) if config.bot_mode == constants.CHALLENGE_USER: config.user_to_challenge = env("USER_TO_CHALLENGE") init_logging(env("LOG_LEVEL", "DEBUG"))
def setup(in_service=False): config.init(sys.argv[1:], in_service) config.init_logging() sys.excepthook = exceptionLogger port = config.getPort() httpd = httpserver.TivoHTTPServer(('', int(port)), httpserver.TivoHTTPHandler) logger = logging.getLogger('pyTivo') logger.info('Last modified: ' + last_date()) logger.info('Python: ' + platform.python_version()) logger.info('System: ' + platform.platform()) for section, settings in config.getShares(): httpd.add_container(section, settings) b = beacon.Beacon() b.add_service('TiVoMediaServer:%s/http' % port) b.start() if 'listen' in config.getBeaconAddresses(): b.listen() httpd.set_beacon(b) httpd.set_service_status(in_service) logger.info('pyTivo is ready.') return httpd
def SvcDoRun(self): config.init([]) config.init_logging() p = os.path.dirname(__file__) f = open(os.path.join(p, 'log.txt'), 'w') sys.stdout = f sys.stderr = f port = config.getPort() httpd = httpserver.TivoHTTPServer(('', int(port)), httpserver.TivoHTTPHandler) for section, settings in config.getShares(): httpd.add_container(section, settings) b = beacon.Beacon() b.add_service('TiVoMediaServer:%s/http' % port) b.start() if 'listen' in config.getBeaconAddresses(): b.listen() httpd.set_beacon(b) while 1: sys.stdout.flush() (rx, tx, er) = select.select((httpd,), (), (), 5) for sck in rx: sck.handle_request() rc = win32event.WaitForSingleObject(self.stop_event, 5) if rc == win32event.WAIT_OBJECT_0: b.stop() break
def __init__(self): self._offset = (0, 0) self._monkeys_collection_path = None self._game_counter = 0 self._wait_location = None pag.PAUSE = config.PYAUTOGUI_SLEEP_SECONDS_BETWEEN_CALLS config.init_logging() logging.info('Bloons TD6 bot initialising') logging.debug('OpenCV loaded from {}'.format(cv2.data.haarcascades))
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt) model = init_model(opt) optimizer, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def init_app(): config.init_logging() if config.INFLUXDB and config.INFLUXDB['host']: stats.db.init(**config.INFLUXDB) import handlers app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI models.base.init_db(app) models.recover.recover() return app, config.DEBUG == 1
def main(): config.init_logging() app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI models.base.init_db(app) with app.app_context(): for lock in models.base.db.session.query(TaskLock).all(): lock.step_id = None models.base.db.session.add(lock) models.base.db.session.commit()
def main(): config.init_logging() app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI models.base.init_db(app) if config.OPEN_FALCON and config.OPEN_FALCON['host_query']: stats.init(**config.OPEN_FALCON) algalon_client = (AlgalonClient(**config.ALGALON) if config.ALGALON and config.ALGALON['dsn'] else None) run(config.POLL_INTERVAL, algalon_client, app)
def main(): config.init_logging() app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI import daemonutils.node_polling models.base.init_db(app) if config.INFLUXDB and config.INFLUXDB['host']: stats.db.init(**config.INFLUXDB) algalon_client = (AlgalonClient(**config.ALGALON) if config.ALGALON and config.ALGALON['dsn'] else None) run(config.POLL_INTERVAL, algalon_client, app)
def init_app(): conf = config.load('config.yaml' if len(sys.argv) == 1 else sys.argv[1]) config.init_logging(conf) if 'influxdb' in conf: stats.db.init(**conf['influxdb']) import handlers app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.mysql_uri(conf) models.base.init_db(app) models.recover.recover() return app, conf.get('debug', 0) == 1
def main(): print('************ Initialize logging **********') config.init_logging(False) # Generate the seeds to be used by the rest of the toolflow print('************ Generating seeds **********') support.generate_seeds(config.nr_of_measurements, config.root_seed) # Create the subdirectories print('************ Creating subdirectories **********') if not os.path.exists(config.reports_dir): os.mkdir(config.reports_dir) if not os.path.exists(config.tmp_dir): os.mkdir(config.tmp_dir)
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def setup(in_service=False): config.init(sys.argv[1:]) config.init_logging() sys.excepthook = exceptionLogger port = config.getPort() httpd = httpserver.TivoHTTPServer(('', int(port)), httpserver.TivoHTTPHandler) logger = logging.getLogger('pyTivo') logger.info('Last modified: ' + last_date()) logger.info('Python: ' + platform.python_version()) logger.info('System: ' + platform.platform()) for section, settings in config.getShares(): httpd.add_container(section, settings) # Precaching of files: does a recursive list of base path if settings.get('precache', 'False').lower() == 'true': plugin = GetPlugin(settings.get('type')) if hasattr(plugin, 'pre_cache'): logger.info('Pre-caching the ' + section + ' share.') pre_cache_filter = getattr(plugin, 'pre_cache') def build_recursive_list(path): try: for f in os.listdir(path): f = os.path.join(path, f) if os.path.isdir(f): build_recursive_list(f) else: pre_cache_filter(f) except: pass build_recursive_list(settings.get('path')) b = beacon.Beacon() b.add_service('TiVoMediaServer:%s/http' % port) b.start() if 'listen' in config.getBeaconAddresses(): b.listen() httpd.set_beacon(b) httpd.set_service_status(in_service) logger.info('pyTivo is ready.') return httpd
def main(): config.init_logging() if config.OPEN_FALCON and config.OPEN_FALCON['host_query']: stats.init(**config.OPEN_FALCON) import handlers app = handlers.base.app app.config['SQLALCHEMY_DATABASE_URI'] = config.SQLALCHEMY_DATABASE_URI models.base.init_db(app) import file_ipc file_ipc.write_nodes_proxies_from_db() debug = config.DEBUG == 1 app.debug = debug app.run(host='127.0.0.1' if debug else '0.0.0.0', port=config.SERVER_PORT)
def setup(opts=None): """Parse options. If none given, opts is set to sys.argv""" if opts is None: opts = sys.argv[1:] config.bootstrap(opts) config.init_logging() config.load() config.parse_options(opts) ensure_dir_exists(app_globals.OPTIONS['output_path']) log_start() if app_globals.OPTIONS['report_pid']: proctl.report_pid() exit(0) config.check() proctl.ensure_singleton_process() init_signals()
def main(): # load settings for training opt = init_opt(description='train.py') logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.error(e, exc_info=True) raise
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: # print(opt.bidirectional) # exit(0) # opt.train_from = 'model/kp20k.ml.copy.uni-directional.20180817-021054/kp20k.ml.copy.uni-directional.epoch=6.batch=6735.total_batch=57300.model' train_data_loader, word2id, id2word, vocab, eval_dataloader = load_data_vocab( opt) model = init_model(opt) optimizer_ml, _, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, _, criterion, train_data_loader, opt, eval_dataloader) except Exception as e: logging.exception("message")
def main(): config.init_logging() global INTERVAL global algalon_client global session engine = db.create_engine(config.SQLALCHEMY_DATABASE_URI) Base.metadata.bind = engine Base.metadata.create_all() session = scoped_session(sessionmaker(bind=engine))() INTERVAL = config.POLL_INTERVAL if config.INFLUXDB and config.INFLUXDB['host']: stats.db.init(**config.INFLUXDB) if config.ALGALON and config.ALGALON['dns']: algalon_client = AlgalonClient(**config.ALGALON) run()
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) # print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180908-054257/kp20k.ml.copy.bi-directional.epoch=9.batch=2932.model' test_data_loader, word2id, id2word, vocab = load_data_vocab(opt, load_train=False) model = init_model(opt) generator = SequenceGenerator(model,opt, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, ) evaluate_beam_search(generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) except Exception as e: logging.exception("message")
def setup(in_service=False): config.init(sys.argv[1:]) config.init_logging() sys.excepthook = exceptionLogger port = config.getPort() httpd = httpserver.TivoHTTPServer(("", int(port)), httpserver.TivoHTTPHandler) logger = logging.getLogger("pyTivo") for section, settings in config.getShares(): httpd.add_container(section, settings) # Precaching of files: does a recursive list of base path if settings.get("precache", "False").lower() == "true": plugin = GetPlugin(settings.get("type")) if hasattr(plugin, "pre_cache"): logger.info("Pre-caching the " + section + " share.") pre_cache_filter = getattr(plugin, "pre_cache") def build_recursive_list(path): try: for f in os.listdir(path): f = os.path.join(path, f) if os.path.isdir(f): build_recursive_list(f) else: pre_cache_filter(f) except: pass build_recursive_list(settings.get("path")) b = beacon.Beacon() b.add_service("TiVoMediaServer:%s/http" % port) b.start() if "listen" in config.getBeaconAddresses(): b.listen() httpd.set_beacon(b) httpd.set_service_status(in_service) logger.info("pyTivo is ready.") return httpd
def main(): conf = config.load('config.yaml' if len(sys.argv) == 1 else sys.argv[1]) config.init_logging(conf) global INTERVAL, _send_to_influxdb global algalon_client global session engine = db.create_engine(config.mysql_uri(conf)) Base.metadata.bind = engine Base.metadata.create_all() session = scoped_session(sessionmaker(bind=engine))() INTERVAL = int(conf.get('interval', INTERVAL)) if 'influxdb' in conf: stats.db.init(**conf['influxdb']) else: _send_to_influxdb = lambda _: None if 'algalon' in conf: algalon_client = AlgalonClient(**conf['algalon']) run()
def main(): opt = config.init_opt(description='predict_keyphrase.py') logger = config.init_logging('predict_keyphrase', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logger.info('Running on CPU!') try: one2one, one2many = generate_dataset() test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets( opt, one2one, one2many) model = init_model(opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) for testset_name, test_data_loader in zip(['kp20k'], test_data_loaders): logger.info('Evaluating %s' % testset_name) output = predict_beam_search( generator, test_data_loader, opt, title='test_%s' % testset_name, predict_save_path=None ) #opt.pred_path + '/%s_test_result/' % (testset_name)) print(output) except Exception as e: logger.error(e, exc_info=True)
def main(): # load settings for training opt = init_opt(description='train.py') logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logging.info( '====================== Checking GPU Availability =========================' ) logger.info("torch.cuda.is_available()={}".format( torch.cuda.is_available())) if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, _, _, _, _ = load_data_vocab_for_training( opt) # ignore the previous test_data_loader valid_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='valid', opt=opt) test_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='test', opt=opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion( model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loaders, test_data_loaders, opt) except Exception as e: logging.error(e, exc_info=True) raise
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: # opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180901-025437/kp20k.ml.copy.bi-directional.epoch=9.batch=938.model' train_data_loader, word2id, id2word, vocab, eval_dataloader = load_data_vocab( opt) model = init_model(opt) # embedding=make_embedding(word2id,id2word) embedding = torch.load('embedding50004.pt') model.init_embedding(embedding) opt.learning_rate = 0.001 optimizer_ml, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, criterion, train_data_loader, opt, eval_dataloader) except Exception as e: logging.exception("message")
parser.add_argument('-dh', '--dbhost', default='127.0.0.1', help='Set the hostname') parser.add_argument('-p', '--dbport', default=27017, help='Set the db port') parser.add_argument('-rs', '--rabbitserver', default='localhost', help='RabbitMQ Server name') parser.add_argument('-ru', '--rabbituser', default='guest', help='RabbitMQ user name') parser.add_argument('-pwd', '--rabbitpwd', default='guest', help='RabbitMQ password') parser.add_argument('-ll', '--loglevel', default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Logging level') parser.add_argument('-log', '--logfile', default=None, help='Set log file') parser.add_argument('-thd', '--threads', default=1, help='Number of processes to spawn. Default=1') parser.add_argument('-exp', '--expiration', default=12, help='Hours until the content training expires') parser.add_argument('-noauto', '--noauto', default=False, help='Hours until the content training expires') parser.add_argument('-maxmem','--maxmem', type=int, default=200, help='Maximum memory size (in MB) before flushing cache') parser.add_argument('-nocache','--nocache', default=False, help='Do not employ a cache to save training') parser.parse_args(namespace=args) config.init_logging(log_level=args.loglevel, logfile=args.logfile) print 'Starting Meshfire Content Recommender' print ' courtesy of scikit learn' print ' options:' print ' database host: %s' % args.dbhost print ' database port: %d' % args.dbport print ' database name: %s' % args.dbname print ' rabbit host: %s' % args.rabbitserver print ' rabbit user: %s' % args.rabbituser print ' rabbit pwd: *********' print ' recommender training expiration time: %d hours' % args.expiration print ' logging level: %s' % args.loglevel print ' logging file: %s' % args.logfile print ' threads: %d' % args.threads print ' Max memory (MB) allowed: %d' % args.maxmem
def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict_save_path=None): logger = config.init_logging(title, predict_save_path + '/%s.log' % title, redirect_to_stdout=False) progbar = Progbar(logger=logger, title=title, target=len(data_loader), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset)) topk_range = [5, 10] score_names = ['precision', 'recall', 'f_score'] example_idx = 0 score_dict = {} # {'precision@5':[],'recall@5':[],'f1score@5':[], 'precision@10':[],'recall@10':[],'f1score@10':[]} for i, batch in enumerate(data_loader): if i > 5: break one2many_batch, one2one_batch = batch src_list, src_len, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() print("batch size - %s" % str(src_list.size(0))) print("src size - %s" % str(src_list.size())) print("target size - %s" % len(trg_copy_target_list)) try: pred_seq_list = generator.beam_search(src_list, src_len, src_oov_map_list, oov_list, opt.word2id) except RuntimeError as re: logging.exception('Encountered OOM RuntimeError, now trying to predict one by one') raise re ''' process each example in current batch ''' for src, src_str, trg, trg_str_seqs, trg_copy, pred_seq, oov in zip(src_list, src_str_list, trg_list, trg_str_list, trg_copy_target_list, pred_seq_list, oov_list): logger.info('====================== %d =========================' % (i)) print_out = '' print_out += '[Source][%d]: %s \n' % (len(src_str), ' '.join(src_str)) src = src.cpu().data.numpy() if torch.cuda.is_available() else src.data.numpy() print_out += '\nSource Input: \n %s\n' % (' '.join([opt.id2word[x] for x in src[:len(src_str) + 5]])) print_out += 'Real Target String [%d] \n\t\t%s \n' % (len(trg_str_seqs), trg_str_seqs) print_out += 'Real Target Input: \n\t\t%s \n' % str([[opt.id2word[x] for x in t] for t in trg]) print_out += 'Real Target Copy: \n\t\t%s \n' % str([[opt.id2word[x] if x < opt.vocab_size else oov[x - opt.vocab_size] for x in t] for t in trg_copy]) trg_str_is_present_flags, _ = if_present_duplicate_phrases(src_str, trg_str_seqs) # ignore the cases that there's no present phrases if opt.must_appear_in_src and np.sum(trg_str_is_present_flags) == 0: logger.error('found no present targets') continue print_out += '[GROUND-TRUTH] #(present)/#(all targets)=%d/%d\n' % (sum(trg_str_is_present_flags), len(trg_str_is_present_flags)) print_out += '\n'.join(['\t\t[%s]' % ' '.join(phrase) if is_present else '\t\t%s' % ' '.join(phrase) for phrase, is_present in zip(trg_str_seqs, trg_str_is_present_flags)]) print_out += '\noov_list: \n\t\t%s \n' % str(oov) # 1st filtering pred_is_valid_flags, processed_pred_seqs, processed_pred_str_seqs, processed_pred_score = process_predseqs(pred_seq, oov, opt.id2word, opt) # 2nd filtering: if filter out phrases that don't appear in text, and keep unique ones after stemming if opt.must_appear_in_src: pred_is_present_flags, _ = if_present_duplicate_phrases(src_str, processed_pred_str_seqs) filtered_trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present_flags] else: pred_is_present_flags = [True] * len(processed_pred_str_seqs) valid_and_present = np.asarray(pred_is_valid_flags) * np.asarray(pred_is_present_flags) match_list = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=processed_pred_str_seqs) print_out += '[PREDICTION] #(valid)=%d, #(present)=%d, #(retained&present)=%d, #(all)=%d\n' % (sum(pred_is_valid_flags), sum(pred_is_present_flags), sum(valid_and_present), len(pred_seq)) print_out += '' ''' Iterate every prediction, print and export predictions ''' preds_out = '' for p_id, (seq, word, score, match, is_valid, is_present) in enumerate( zip(processed_pred_seqs, processed_pred_str_seqs, processed_pred_score, match_list, pred_is_valid_flags, pred_is_present_flags)): # if p_id > 5: # break preds_out += '%s\n' % (' '.join(word)) if is_present: print_phrase = '[%s]' % ' '.join(word) else: print_phrase = ' '.join(word) if is_valid: print_phrase = '*%s' % print_phrase if match == 1.0: correct_str = '[correct!]' else: correct_str = '' if any([t >= opt.vocab_size for t in seq.sentence]): copy_str = '[copied!]' else: copy_str = '' print_out += '\t\t[%.4f]\t%s \t %s %s%s\n' % (-score, print_phrase, str(seq.sentence), correct_str, copy_str) ''' Evaluate predictions w.r.t different filterings and metrics ''' processed_pred_seqs = np.asarray(processed_pred_seqs)[valid_and_present] filtered_processed_pred_str_seqs = np.asarray(processed_pred_str_seqs)[valid_and_present] filtered_processed_pred_score = np.asarray(processed_pred_score)[valid_and_present] # 3rd round filtering (one-word phrases) num_oneword_seq = -1 filtered_pred_seq, filtered_pred_str_seqs, filtered_pred_score = post_process_predseqs((processed_pred_seqs, filtered_processed_pred_str_seqs, filtered_processed_pred_score), num_oneword_seq) match_list_exact = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=filtered_pred_str_seqs, type='exact') match_list_soft = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=filtered_pred_str_seqs, type='partial') assert len(filtered_pred_seq) == len(filtered_pred_str_seqs) == len(filtered_pred_score) == len(match_list_exact) == len(match_list_soft) print_out += "\n =======================================================" print_pred_str_seqs = [" ".join(item) for item in filtered_pred_str_seqs] print_trg_str_seqs = [" ".join(item) for item in filtered_trg_str_seqs] # print_out += "\n PREDICTION: " + " / ".join(print_pred_str_seqs) # print_out += "\n GROUND TRUTH: " + " / ".join(print_trg_str_seqs) for topk in topk_range: results_exact = evaluate(match_list_exact, filtered_pred_str_seqs, filtered_trg_str_seqs, topk=topk) for k, v in zip(score_names, results_exact): if '%s@%d_exact' % (k, topk) not in score_dict: score_dict['%s@%d_exact' % (k, topk)] = [] score_dict['%s@%d_exact' % (k, topk)].append(v) print_out += "\n ------------------------------------------------- EXACT, k=%d" % (topk) print_out += "\n --- batch precision, recall, fscore: " + str(results_exact[0]) + " , " + str(results_exact[1]) + " , " + str(results_exact[2]) print_out += "\n --- total precision, recall, fscore: " + str(np.average(score_dict['precision@%d_exact' % (topk)])) + " , " +\ str(np.average(score_dict['recall@%d_exact' % (topk)])) + " , " +\ str(np.average(score_dict['f_score@%d_exact' % (topk)])) for topk in topk_range: results_soft = evaluate(match_list_soft, filtered_pred_str_seqs, filtered_trg_str_seqs, topk=topk) for k, v in zip(score_names, results_soft): if '%s@%d_soft' % (k, topk) not in score_dict: score_dict['%s@%d_soft' % (k, topk)] = [] score_dict['%s@%d_soft' % (k, topk)].append(v) print_out += "\n ------------------------------------------------- SOFT, k=%d" % (topk) print_out += "\n --- batch precision, recall, fscore: " + str(results_soft[0]) + " , " + str(results_soft[1]) + " , " + str(results_soft[2]) print_out += "\n --- total precision, recall, fscore: " + str(np.average(score_dict['precision@%d_soft' % (topk)])) + " , " +\ str(np.average(score_dict['recall@%d_soft' % (topk)])) + " , " +\ str(np.average(score_dict['f_score@%d_soft' % (topk)])) print_out += "\n =======================================================" logger.info(print_out) ''' write predictions to disk ''' if predict_save_path: if not os.path.exists(os.path.join(predict_save_path, title + '_detail')): os.makedirs(os.path.join(predict_save_path, title + '_detail')) # write print-out with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_print.txt'), 'w') as f_: f_.write(print_out) # write original predictions with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_prediction.txt'), 'w') as f_: f_.write(preds_out) out_dict = {} out_dict['src_str'] = src_str out_dict['trg_str'] = trg_str_seqs out_dict['trg_present_flag'] = trg_str_is_present_flags out_dict['pred_str'] = processed_pred_str_seqs out_dict['pred_score'] = [float(s) for s in processed_pred_score] out_dict['present_flag'] = pred_is_present_flags out_dict['valid_flag'] = pred_is_valid_flags out_dict['match_flag'] = [float(m) for m in match_list] for k,v in out_dict.items(): out_dict[k] = list(v) # print('len(%s) = %d' % (k, len(v))) # print(out_dict) assert len(out_dict['trg_str']) == len(out_dict['trg_present_flag']) assert len(out_dict['pred_str']) == len(out_dict['present_flag']) \ == len(out_dict['valid_flag']) == len(out_dict['match_flag']) == len(out_dict['pred_score']) with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '.json'), 'w') as f_: f_.write(json.dumps(out_dict)) progbar.update(epoch, example_idx, [('f_score@5_exact', np.average(score_dict['f_score@5_exact'])), ('f_score@5_soft', np.average(score_dict['f_score@5_soft'])), ('f_score@10_exact', np.average(score_dict['f_score@10_exact'])), ('f_score@10_soft', np.average(score_dict['f_score@10_soft'])),]) example_idx += 1 logger.info('#(f_score@5_exact)=%d, sum=%f' % (len(score_dict['f_score@5_exact']), sum(score_dict['f_score@5_exact']))) logger.info('#(f_score@10_exact)=%d, sum=%f' % (len(score_dict['f_score@10_exact']), sum(score_dict['f_score@10_exact']))) # Write score summary to disk. Each row is scores (precision, recall and f-score) if predict_save_path: with open(predict_save_path + os.path.sep + title + '_result.csv', 'w') as result_csv: csv_lines = [] for mode in ["exact", "soft"]: for topk in topk_range: csv_line = "" for k in score_names: csv_line += ',%f' % np.average(score_dict['%s@%d_%s' % (k, topk, mode)]) csv_lines.append(csv_line + '\n') result_csv.writelines(csv_lines) # precision, recall, f_score = macro_averaged_score(precisionlist=score_dict['precision'], recalllist=score_dict['recall']) # logging.info("Macro@5\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@5']), np.average(score_dict['recall@5']), np.average(score_dict['f1score@5']))) # logging.info("Macro@10\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@10']), np.average(score_dict['recall@10']), np.average(score_dict['f1score@10']))) # precision, recall, f_score = evaluate(true_seqs=target_all, pred_seqs=prediction_all, topn=5) # logging.info("micro precision %.4f , micro recall %.4f, micro fscore %.4f " % (precision, recall, f_score)) for k,v in score_dict.items(): logger.info('#(%s) = %d' % (k, len(v))) return score_dict
def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, save_path=None): logging = config.init_logging(title, save_path + '/%s.log' % title) progbar = Progbar(logger=logging, title=title, target=len(data_loader) / opt.beam_batch, batch_size=opt.beam_batch, total_examples=len(data_loader) / opt.beam_batch) example_idx = 0 score_dict = { } # {'precision@5':[],'recall@5':[],'f1score@5':[], 'precision@10':[],'recall@10':[],'f1score@10':[]} if opt.report_file: f = open(opt.report_file, 'w') for i, batch in enumerate(data_loader): # if i > 0: # break one2many_batch = batch src_list, src_len, trg_list, _, trg_copy_target_list, src_oov_map_list, \ oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available() and opt.use_gpu: src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() pred_seq_list = generator.beam_search(src_list, src_len, src_oov_map_list, oov_list, opt.word2id) # print(len(pred_seq_list[0]), type(pred_seq_list[0])) ''' process each example in current batch ''' for src, src_str, trg, trg_str_seqs, trg_copy, pred_seq, oov \ in zip(src_list, src_str_list, trg_list, trg_str_list, trg_copy_target_list, pred_seq_list, oov_list): # logging.info('====================== %d =========================' % (example_idx)) # print(trg_str_seqs) # print(src_str) # print(src, 'src') # print(trg_copy, 'trg_copy') # print(pred_seq, 'pred_seq') # print(oov, 'oov') trg_str_is_present = if_present_duplicate_phrase( src_str, trg_str_seqs) # TODO # print(trg_str_is_present) # 1st filtering pred_is_valid, processed_pred_seqs, processed_pred_str_seqs, \ processed_pred_score = process_predseqs(pred_seq, oov, opt.id2word, opt) # print(pred_is_valid, 'pred_is_valid') # print(processed_pred_seqs, 'precessed_pred_seqs') # print(processed_pred_str_seqs, len(processed_pred_str_seqs)) # print(processed_pred_score, 'processed_pred_score') # print(len(processed_pred_str_seqs)) # 2nd filtering: if filter out phrases that don't appear in text, and keep unique ones after stemming # print(opt.must_appear_in_src) if opt.must_appear_in_src is True: pred_is_present = if_present_duplicate_phrase( src_str, processed_pred_str_seqs) trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present] else: pred_is_present = [True] * len(processed_pred_str_seqs) # print(pred_is_present) valid_and_present = np.asarray(pred_is_valid) * np.asarray( pred_is_present) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=processed_pred_str_seqs) # print(match_list, 'match', len(match_list)) ''' Evaluate predictions w.r.t different filterings and metrics ''' num_oneword_range = [-1, 1] topk_range = [5, 10] score_names = ['precision', 'recall', 'f_score'] processed_pred_seqs = np.asarray( processed_pred_seqs)[valid_and_present] processed_pred_str_seqs = np.asarray( processed_pred_str_seqs)[valid_and_present] processed_pred_score = np.asarray( processed_pred_score)[valid_and_present] try: data = {} data['src_str'] = src_str data['trg_str_seqs'] = trg_str_seqs data['pred'] = list(processed_pred_str_seqs) f.write(json.dumps(data) + '\n') except: pass for num_oneword_seq in num_oneword_range: # 3rd round filtering (one-word phrases) filtered_pred_seq, filtered_pred_str_seqs, filtered_pred_score = \ post_process_predseqs((processed_pred_seqs, processed_pred_str_seqs, processed_pred_score), num_oneword_seq) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=filtered_pred_str_seqs) assert len(filtered_pred_seq) == len( filtered_pred_str_seqs) == len(filtered_pred_score) == len( match_list) for topk in topk_range: results = evaluate(match_list, filtered_pred_seq, trg_str_seqs, topk=topk) for k, v in zip(score_names, results): if '%s@%d#oneword=%d' % ( k, topk, num_oneword_seq) not in score_dict: score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)] = [] score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)].append(v) if example_idx % 10 == 0: print('#(precision@5#oneword=-1)=%d, avg=%f' % (len(score_dict['precision@5#oneword=-1']), np.average(score_dict['precision@5#oneword=-1']))) print('#(precision@10#oneword=-1)=%d, avg=%f' % (len(score_dict['precision@10#oneword=-1']), np.average(score_dict['precision@10#oneword=-1']))) print('#(recall@5#oneword=-1)=%d, avg=%f' % (len(score_dict['recall@5#oneword=-1']), np.average(score_dict['recall@5#oneword=-1']))) print('#(recall@10#oneword=-1)=%d, avg=%f' % (len(score_dict['recall@10#oneword=-1']), np.average(score_dict['recall@10#oneword=-1']))) x, y = np.average(score_dict['f_score@5#oneword=-1']), np.average( score_dict['f_score@10#oneword=-1']) print('#(f_score@5#oneword=-1)=%d, avg=%f' % (len(score_dict['f_score@5#oneword=-1']), x)) print('#(f_score@10#oneword=-1)=%d, avg=%f' % (len(score_dict['f_score@10#oneword=-1']), y)) progbar.update(epoch, example_idx, [('f_score@5#oneword=-1', x), ('f_score@10#oneword=-1', y)]) print('*' * 50) example_idx += 1 # exit(0) print('#(f_score@5#oneword=-1)=%d, avg=%f' % (len(score_dict['f_score@5#oneword=-1']), np.average(score_dict['f_score@5#oneword=-1']))) print('#(f_score@10#oneword=-1)=%d, avg=%f' % (len(score_dict['f_score@10#oneword=-1']), np.average(score_dict['f_score@10#oneword=-1']))) # print('#(f_score@5#oneword=1)=%d, avg=%f' % (len(score_dict['f_score@5#oneword=1']), np.average(score_dict['f_score@5#oneword=1']))) # print('#(f_score@10#oneword=1)=%d, avg=%f' % (len(score_dict['f_score@10#oneword=1']), np.average(score_dict['f_score@10#oneword=1']))) if save_path: # export scores. Each row is scores (precision, recall and f-score) of different way of filtering predictions (how many one-word predictions to keep) with open(save_path + os.path.sep + title + '_result.csv', 'w') as result_csv: csv_lines = [] for num_oneword_seq in num_oneword_range: for topk in topk_range: csv_line = '#oneword=%d,@%d' % (num_oneword_seq, topk) for k in score_names: csv_line += ',%f' % np.average( score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)]) csv_lines.append(csv_line + '\n') result_csv.writelines(csv_lines) return score_dict
app.register_blueprint(geo_app, url_prefix='/geo') geo_app.init_app(app) app.logger.setLevel(Config.LOG_LEVEL) app.config.dba = PostgreSQLEngine(**app.config) return app if __name__ == "__main__": from werkzeug.serving import run_simple build_parser('server.conf').parse_args(namespace=args) init_logging(args, flask.logging.default_handler, logging.FileHandler('server.log')) Config.LOG_LEVEL = args.log_level Config.CONFIG_FILE = args.config_file app = create_app(Config) @app.after_request def add_headers(response): origin = request.headers.get('Origin') if origin and (origin == app.config['CORS_ALLOW_ORIGIN'] or origin.startswith('http://localhost')): response.headers['Access-Control-Allow-Origin'] = origin response.headers['Access-Control-Allow-Credentials'] = 'true' response.headers[ 'Access-Control-Allow-Headers'] = 'Content-Type' # allow application/json
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging('train', opt.exp_path + '/output.log') logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt, load_train=False) model = init_model(opt) # optimizer, criterion = init_optimizer_criterion(model, opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) # import time # start_time = time.time() evaluate_beam_search( generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) # print("--- %s seconds --- Complete Beam Search" % (time.time() - start_time)) # predict_greedy(model, test_data_loader, test_examples, opt) except Exception as e: logging.exception("message")
parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.my_own_opts(parser) config.vocab_opts(parser) config.model_opts(parser) config.train_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False if torch.cuda.is_available(): if not opt.gpuid: opt.gpuid = 0 opt.device = torch.device("cuda:%d" % opt.gpuid) else: opt.device = torch.device("cpu") opt.gpuid = -1 print("CUDA is not available, fall back to CPU.") logging = config.init_logging(log_file=opt.model_path + '/output.log', stdout=True) logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] main(opt)
import config # from utils import init_logging # init_logging() config.init_logging()
"""Model used to classify an image as having been triggered by motion """ import logging import os import numpy as np import cv2 import utils import config LOGGER = logging.getLogger(__name__) CONF = config.load_config() config.init_logging() class MotionModel(): def __init__(self): """Initialize the MotionModel class """ self.min_area = CONF['min_area'] self.model = self.load_model() self.person_class = 15 # index of the person class of the pre-trained model def load_model(self): proto_path = os.path.join( config.MODEL_DIR, 'MobileNetSSD_deploy.prototxt.txt') model_path = os.path.join( config.MODEL_DIR, 'MobileNetSSD_deploy.caffemodel') net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, save_path=None): logging = config.init_logging(title, save_path + '/%s.log' % title) progbar = Progbar(logger=logging, title=title, target=len(data_loader.dataset.examples), batch_size=data_loader.batch_size, total_examples=len(data_loader.dataset.examples)) example_idx = 0 score_dict = {} # {'precision@5':[],'recall@5':[],'f1score@5':[], 'precision@10':[],'recall@10':[],'f1score@10':[]} for i, batch in enumerate(data_loader): # if i > 3: # break one2many_batch, one2one_batch = batch src_list, src_len, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch if torch.cuda.is_available(): src_list = src_list.cuda() src_oov_map_list = src_oov_map_list.cuda() print("batch size - %s" % str(src_list.size(0))) # print("src size - %s" % str(src_list.size())) # print("target size - %s" % len(trg_copy_target_list)) pred_seq_list = generator.beam_search(src_list, src_len, src_oov_map_list, oov_list, opt.word2id) ''' process each example in current batch ''' for src, src_str, trg, trg_str_seqs, trg_copy, pred_seq, oov in zip(src_list, src_str_list, trg_list, trg_str_list, trg_copy_target_list, pred_seq_list, oov_list): # logging.info('====================== %d =========================' % (example_idx)) print_out = '' print_out += '[Source][%d]: %s \n' % (len(src_str), ' '.join(src_str)) # src = src.cpu().data.numpy() if torch.cuda.is_available() else src.data.numpy() # print_out += '\nSource Input: \n %s\n' % (' '.join([opt.id2word[x] for x in src[:len(src_str) + 5]])) # print_out += 'Real Target String [%d] \n\t\t%s \n' % (len(trg_str_seqs), trg_str_seqs) # print_out += 'Real Target Input: \n\t\t%s \n' % str([[opt.id2word[x] for x in t] for t in trg]) # print_out += 'Real Target Copy: \n\t\t%s \n' % str([[opt.id2word[x] if x < opt.vocab_size else oov[x - opt.vocab_size] for x in t] for t in trg_copy]) trg_str_is_present = if_present_duplicate_phrase(src_str, trg_str_seqs) print_out += '[GROUND-TRUTH] #(present)/#(all targets)=%d/%d\n' % (sum(trg_str_is_present), len(trg_str_is_present)) print_out += '\n'.join(['\t\t[%s]' % ' '.join(phrase) if is_present else '\t\t%s' % ' '.join(phrase) for phrase, is_present in zip(trg_str_seqs, trg_str_is_present)]) print_out += '\noov_list: \n\t\t%s \n' % str(oov) # 1st filtering pred_is_valid, processed_pred_seqs, processed_pred_str_seqs, processed_pred_score = process_predseqs(pred_seq, oov, opt.id2word, opt) # 2nd filtering: if filter out phrases that don't appear in text, and keep unique ones after stemming if opt.must_appear_in_src: pred_is_present = if_present_duplicate_phrase(src_str, processed_pred_str_seqs) trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present] else: pred_is_present = [True] * len(processed_pred_str_seqs) valid_and_present = np.asarray(pred_is_valid) * np.asarray(pred_is_present) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=processed_pred_str_seqs) print_out += '[PREDICTION] #(valid)=%d, #(present)=%d, #(retained&present)=%d, #(all)=%d\n' % (sum(pred_is_valid), sum(pred_is_present), sum(valid_and_present), len(pred_seq)) print_out += '' ''' Print and export predictions ''' preds_out = '' for p_id, (seq, word, score, match, is_valid, is_present) in enumerate( zip(processed_pred_seqs, processed_pred_str_seqs, processed_pred_score, match_list, pred_is_valid, pred_is_present)): # if p_id > 5: # break preds_out += '%s\n' % (' '.join(word)) if is_present: print_phrase = '[%s]' % ' '.join(word) else: print_phrase = ' '.join(word) if is_valid: print_phrase = '*%s' % print_phrase if match == 1.0: correct_str = '[correct!]' else: correct_str = '' if any([t >= opt.vocab_size for t in seq.sentence]): copy_str = '[copied!]' else: copy_str = '' # print_out += '\t\t[%.4f]\t%s \t %s %s%s\n' % (-score, print_phrase, str(seq.sentence), correct_str, copy_str) ''' Evaluate predictions w.r.t different filterings and metrics ''' num_oneword_range = [-1, 1] topk_range = [5, 10] score_names = ['precision', 'recall', 'f_score'] processed_pred_seqs = np.asarray(processed_pred_seqs)[valid_and_present] processed_pred_str_seqs = np.asarray(processed_pred_str_seqs)[valid_and_present] processed_pred_score = np.asarray(processed_pred_score)[valid_and_present] for num_oneword_seq in num_oneword_range: # 3rd round filtering (one-word phrases) filtered_pred_seq, filtered_pred_str_seqs, filtered_pred_score = post_process_predseqs((processed_pred_seqs, processed_pred_str_seqs, processed_pred_score), num_oneword_seq) match_list = get_match_result(true_seqs=trg_str_seqs, pred_seqs=filtered_pred_str_seqs) assert len(filtered_pred_seq) == len(filtered_pred_str_seqs) == len(filtered_pred_score) == len(match_list) for topk in topk_range: results = evaluate(match_list, filtered_pred_seq, trg_str_seqs, topk=topk) for k, v in zip(score_names, results): if '%s@%d#oneword=%d' % (k, topk, num_oneword_seq) not in score_dict: score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)] = [] score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)].append(v) print_out += '\t%s@%d#oneword=%d = %f\n' % (k, topk, num_oneword_seq, v) # logging.info(print_out) if save_path: if not os.path.exists(os.path.join(save_path, title + '_detail')): os.makedirs(os.path.join(save_path, title + '_detail')) with open(os.path.join(save_path, title + '_detail', str(example_idx) + '_print.txt'), 'w') as f_: f_.write(print_out) with open(os.path.join(save_path, title + '_detail', str(example_idx) + '_prediction.txt'), 'w') as f_: f_.write(preds_out) progbar.update(epoch, example_idx, [('f_score@5#oneword=-1', np.average(score_dict['f_score@5#oneword=-1'])), ('f_score@10#oneword=-1', np.average(score_dict['f_score@10#oneword=-1']))]) example_idx += 1 print('#(f_score@5#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=-1']), sum(score_dict['f_score@5#oneword=-1']))) print('#(f_score@10#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=-1']), sum(score_dict['f_score@10#oneword=-1']))) print('#(f_score@5#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=1']), sum(score_dict['f_score@5#oneword=1']))) print('#(f_score@10#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=1']), sum(score_dict['f_score@10#oneword=1']))) if save_path: # export scores. Each row is scores (precision, recall and f-score) of different way of filtering predictions (how many one-word predictions to keep) with open(save_path + os.path.sep + title + '_result.csv', 'w') as result_csv: csv_lines = [] for num_oneword_seq in num_oneword_range: for topk in topk_range: csv_line = '#oneword=%d,@%d' % (num_oneword_seq, topk) for k in score_names: csv_line += ',%f' % np.average(score_dict['%s@%d#oneword=%d' % (k, topk, num_oneword_seq)]) csv_lines.append(csv_line + '\n') result_csv.writelines(csv_lines) # precision, recall, f_score = macro_averaged_score(precisionlist=score_dict['precision'], recalllist=score_dict['recall']) # logging.info("Macro@5\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@5']), np.average(score_dict['recall@5']), np.average(score_dict['f1score@5']))) # logging.info("Macro@10\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@10']), np.average(score_dict['recall@10']), np.average(score_dict['f1score@10']))) # precision, recall, f_score = evaluate(true_seqs=target_all, pred_seqs=prediction_all, topn=5) # logging.info("micro precision %.4f , micro recall %.4f, micro fscore %.4f " % (precision, recall, f_score)) return score_dict
from config import init_logging BASE_URL = "http://182.92.81.159" HEADERS = {"Content-Type":"application/json"} USER_ID = None init_logging()
opt.gpuid = 0 # if opt.gpuid: # cuda.set_device(0) # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k,v in opt.__dict__.items()] @time_usage def _valid(data_loader, model, criterion, optimizer, epoch, opt, is_train=False): progbar = Progbar(title='Validating', target=len(data_loader), batch_size=opt.batch_size, total_examples=len(data_loader.dataset)) if is_train: model.train() else: model.eval() losses = []
opt.gpuid = 0 # if opt.gpuid: # cuda.set_device(0) # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] @time_usage def _valid(data_loader, model, criterion, optimizer, epoch, opt, is_train=False): progbar = Progbar(title='Validating', target=len(data_loader),
def main(): opt = config.init_opt(description='predict.py') logger = config.init_logging('predict', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logger.info('Running on CPU!') try: valid_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='valid', opt=opt) test_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='test', opt=opt) opt.word2id = word2id opt.id2word = id2word opt.vocab = vocab model = init_model(opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) valid_score_dict = evaluate_multiple_datasets( generator, valid_data_loaders, opt, title='valid', predict_save_path=opt.pred_path) test_score_dict = evaluate_multiple_datasets( generator, test_data_loaders, opt, title='test', predict_save_path=opt.pred_path) # test_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets(opt) # for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders): # logger.info('Evaluating %s' % testset_name) # evaluate_beam_search(generator, test_data_loader, opt, # title='test_%s' % testset_name, # predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name)) except Exception as e: logger.error(e, exc_info=True)
def main(): # load settings for training opt = init_opt(description='train.py') opt.useGpu = 1 opt.encoder_type = 'bert' opt.useCLF = True opt.data = 'data/kp20k/kp20k' opt.vocab = 'data/kp20k/kp20k.vocab.pt' if opt.encoder_type == 'transformer': opt.batch_size = 32 opt.d_inner = 2048 opt.enc_n_layers = 4 opt.dec_n_layers = 2 opt.n_head = 8 opt.d_k = 64 opt.d_v = 64 opt.d_model = 512 opt.word_vec_size = 512 opt.run_valid_every = 5000000 opt.save_model_every = 20000 opt.decode_old = True #opt.copy_attention = False elif opt.encoder_type == 'bert': opt.useOnlyTwo = False opt.avgHidden = False opt.useZeroDecodeHidden = True opt.useSameEmbeding = False opt.batch_size = 10 opt.max_sent_length = 10 opt.run_valid_every = 40000 opt.decode_old = False opt.beam_search_batch_size = 10 opt.bert_model = 'bert-base-uncased' opt.tokenizer = BertTokenizer.from_pretrained(opt.bert_model) else: opt.enc_layers = 2 opt.bidirectional = True opt.decode_old = True opt.run_valid_every = 2 opt.onlyTest = False if opt.onlyTest: opt.train_ml = False opt.run_valid_every = 5 opt.beam_size = 64 opt.beam_search_batch_size = 128 #opt.train_from = 'exp/kp20k.ml.copy.20181129-193506/model/kp20k.ml.copy.epoch=1.batch=20000.total_batch=20000.model' opt.train_from = 'exp/kp20k.ml.copy.20181128-153121/model/kp20k.ml.copy.epoch=2.batch=15495.total_batch=38000.model' #opt.train_from = 'exp/kp20k.ml.copy.20181117-190121/model/kp20k.ml.copy.epoch=3.batch=14172.total_batch=56000.model' logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logging.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available() and opt.useGpu: if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt) model = init_model(opt) if torch.cuda.is_available() and opt.useGpu: model.cuda() print("model:") print(model) print() optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion( model, opt) if torch.cuda.is_available() and opt.useGpu: criterion.cuda() train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.error(e, exc_info=True) raise