class ClassBase(object): """ This class serves as inheritance base for every other class that will require universal features, like logger, decryptor and such. """ def __init__(self, logger_name, logging_level): self._logger = Logger(logger_name, logging_level) def log(self, level, text): """ A simple bridge-method, used to pass values to instance's logger. :param level: String - level of log message :param text: String - message itself :return: None """ self._logger.log(level, text) def get_cipher(self): """ Creates and returns an instance of AESCipher class - wrapper for AES encryption processing. :return: AESCipher class instance """ return AESCipher() def get_current_datetime_string(self): """ Returns a current datetime stamp, formatted in readable string :return: Timestamp string """ return datetime.now().strftime(get_time_formatter())
def MatchFiles(checkerFile, c1File, targetArch, debuggableMode): for testCase in checkerFile.testCases: if testCase.testArch not in [None, targetArch]: continue if testCase.forDebuggable != debuggableMode: continue # TODO: Currently does not handle multiple occurrences of the same group # name, e.g. when a pass is run multiple times. It will always try to # match a check group against the first output group of the same name. c1Pass = c1File.findPass(testCase.name) if c1Pass is None: with file(c1File.fileName) as cfgFile: Logger.log(''.join(cfgFile), Logger.Level.Error) Logger.fail("Test case not found in the CFG file", testCase.fileName, testCase.startLineNo, testCase.name) Logger.startTest(testCase.name) try: MatchTestCase(testCase, c1Pass, c1File.instructionSetFeatures) Logger.testPassed() except MatchFailedException as e: lineNo = c1Pass.startLineNo + e.lineNo if e.statement.variant == TestStatement.Variant.Not: msg = "NOT statement matched line {}" else: msg = "Statement could not be matched starting from line {}" msg = msg.format(lineNo) with file(c1File.fileName) as cfgFile: Logger.log(''.join(cfgFile), Logger.Level.Error) Logger.testFailed(msg, e.statement, e.variables)
class BatchCrawler(): MAX_DOCS_NUM = 100 def __init__(self, database_config_path, source_name, domain, encode, request_interval): self.logger = Logger("crawler", domain) self.adapter = DocRawAdapter(database_config_path, source_name, self.logger) self.domain = domain self.encode = encode self.request_interval = request_interval def run(self): while True: count = 0 try: for url_hash, url in self.adapter.load_uncrawled_docs(BatchCrawler.MAX_DOCS_NUM): count += 1 self.logger.log("crawling url %s"%url, 2) page = common_utils.page_crawl(url) if page == None: self.adapter.update_doc_raw_as_crawled_failed(url_hash) continue if self.encode != "utf-8": page = unicode(page, self.encode).encode("utf-8") self.adapter.update_doc_raw_with_crawled_page(url_hash, "utf-8", page) time.sleep(float(self.request_interval)) if count < BatchCrawler.MAX_DOCS_NUM: break except: self.logger.log("mongo error")
class BatchCrawler(): MAX_DOCS_NUM = 100 def __init__(self, database_config_path, source_name, domain, encode, request_interval): self.logger = Logger("crawler", domain) self.adapter = DocRawAdapter(database_config_path, source_name, self.logger) self.domain = domain self.encode = encode self.request_interval = request_interval def run(self): while True: count = 0 try: for url_hash, url in self.adapter.load_uncrawled_docs( BatchCrawler.MAX_DOCS_NUM): count += 1 self.logger.log("crawling url %s" % url, 2) page = common_utils.page_crawl(url) if page == None: self.adapter.update_doc_raw_as_crawled_failed(url_hash) continue if self.encode != "utf-8": page = unicode(page, self.encode).encode("utf-8") self.adapter.update_doc_raw_with_crawled_page( url_hash, "utf-8", page) time.sleep(float(self.request_interval)) if count < BatchCrawler.MAX_DOCS_NUM: break except: self.logger.log("mongo error")
def fetch(config, subreddit): logger = Logger('main', 'fetch', plain=True) loaders = [] try: # pushshift pushshift = Pushshift(root, config, subreddit) loaders.append(pushshift) # crawler crawler = Crawler(root, config, subreddit) loaders.append(crawler) # praw praw = Praw(root, config, subreddit) loaders.append(praw) # start loader threads background = False # TODO thread implementation for loader in loaders: if background: loader.start() else: loader.run() # wait until abort while background: Sleep(1) except KeyboardInterrupt: for loader in loaders: loader.stop(1) raise KeyboardInterrupt() except Exception as e: logger.log(f'...fetch error {repr(e)}')
def match_files(checker_file, c1_file, target_arch, debuggable_mode, print_cfg): for test_case in checker_file.test_cases: if test_case.test_arch not in [None, target_arch]: continue if test_case.for_debuggable != debuggable_mode: continue # TODO: Currently does not handle multiple occurrences of the same group # name, e.g. when a pass is run multiple times. It will always try to # match a check group against the first output group of the same name. c1_pass = c1_file.find_pass(test_case.name) if c1_pass is None: with open(c1_file.full_file_name) as cfg_file: Logger.log("".join(cfg_file), Logger.Level.ERROR) Logger.fail("Test case not found in the CFG file", c1_file.full_file_name, test_case.start_line_no, test_case.name) Logger.start_test(test_case.name) try: match_test_case(test_case, c1_pass, c1_file.instruction_set_features) Logger.test_passed() except MatchFailedException as e: line_no = c1_pass.start_line_no + e.line_no if e.statement.variant == TestStatement.Variant.NOT: msg = "NOT statement matched line {}" else: msg = "Statement could not be matched starting from line {}" msg = msg.format(line_no) if print_cfg: with open(c1_file.full_file_name) as cfg_file: Logger.log("".join(cfg_file), Logger.Level.ERROR) Logger.test_failed(msg, e.statement, e.variables)
def test(idx, args, T, shared_net, path): device = torch.device("cuda") torch.manual_seed(args.seed + idx) if args.cuda: torch.cuda.manual_seed(args.seed + idx) env = make_env(args.env, stack_frames=args.stacked_frames, max_episode_steps=args.max_episode_steps, episodic_life=True, reward_clipping=False) env.seed(args.seed + idx) state = env.reset() state_v = torch.from_numpy(state).float().to(device) hx = torch.zeros(1, 512).to(device) cx = torch.zeros(1, 512).to(device) info = True # game is real done, not end of a life (EpisodicLife) net = A3C_LSTM(env.observation_space.shape[0], env.action_space.n).to(device) net.eval() logger = Logger(name="test", path=path, model=shared_net, start_time=time.time(), print_log=True, save_model=True) while T.value < args.num_timesteps: # Synchronize thread-specific parameters if info: net.load_state_dict(shared_net.state_dict()) # Perform action according to policy with torch.no_grad(): value_v, logit_v, (hx, cx) = net(state_v.unsqueeze(0), (hx, cx)) prob_v = F.softmax(logit_v, dim=1) action_v = torch.multinomial(prob_v, num_samples=1) action = int(action_v.item()) # Receive reward and new state state, reward, done, info = env.step(action) state_v = torch.from_numpy(state).float().to(device) logger.log(T.value, reward, info) if done: state = env.reset() state_v = torch.from_numpy(state).float().to(device) hx = torch.zeros(1, 512).to(device) cx = torch.zeros(1, 512).to(device)
def DumpPass(outputFilename, passName): c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r")) compiler_pass = c1File.findPass(passName) if compiler_pass: maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body) lenLineNo = len(str(maxLineNo)) + 2 curLineNo = compiler_pass.startLineNo for line in compiler_pass.body: Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line) curLineNo += 1 else: Logger.fail("Pass \"" + passName + "\" not found in the output")
def dump_pass(output_filename, pass_name): c1_file = parse_c1_visualizer_stream(output_filename, open(output_filename, "r")) compiler_pass = c1_file.find_pass(pass_name) if compiler_pass: max_line_no = compiler_pass.start_line_no + len(compiler_pass.body) len_line_no = len(str(max_line_no)) + 2 cur_line_no = compiler_pass.start_line_no for line in compiler_pass.body: Logger.log((str(cur_line_no) + ":").ljust(len_line_no) + line) cur_line_no += 1 else: Logger.fail('Pass "{}" not found in the output'.format(pass_name))
def check_stop(self): # check stop scan task = self.web_api.get_scan(self.new_task) current_id = task["start_id"] """Check stopped""" if current_id != self.start_id: self.new_task["status"] = 3 self.web_api.put(self.new_task) self.web_api.post_notify( message=unicode(u"Stopped scanning task : " + str(self.new_task["id"])), status=1) Logger.log("Stopped task " + str(self.new_task["id"])) return True return False
class Testset: """Top class for testset, a collection of testcases.""" def __init__(self, working_dir : str = None, testcases : list = [], runner : str = None, logger : Logger = None, testset_name=""): """Testset constructor.""" self.working_dir = working_dir if working_dir is not None else "" self.testcases = testcases self.logger = Logger() if logger is None else logger self.testset_name = testset_name def append(self, testcase : Testcase): """Appends a testcase to the testset.""" self.testcases.append(testcase) def run(self): """Run all testcases in testset.""" logname = os.path.join("", *[self.working_dir, self.testset_name + ".log"]) self.logger.open_file_log(filename=logname) self.results = [] n_testcases = len(self.testcases) i = 1 for testcase in self.testcases: testcase.set_logger(self.logger) print ("Running test %d / %d ..." % (i, n_testcases)) i += 1 if testcase.expected_to_fail: self.results.append(not testcase.run()) else: self.results.append(testcase.run()) self.log_results() if False in self.results: print("%d / %d testcase(s) FAILED" % (self.results.count(False), n_testcases)) else: print("All %d testcase(s) PASSED" % (n_testcases)) self.logger.close() return self.results def log_results(self, results=None): """Logs all testcases in testset and their pass / fail status.""" results = results if results is not None else self.results testcase_max_length = len(max(self.testcases, key=lambda t : len(t.testcase_name)).testcase_name) # go over all testcase names, find (one of) the biggest and get the size of it result_fmt_str = " %%-%ds : %%s" % (testcase_max_length) self.logger.log("Results:") for test_result in zip(self.testcases, self.results): self.logger.log(result_fmt_str % (test_result[0].testcase_name, "PASSED" if test_result[1] is True else "FAILED"))
def publish(interval, kaggle): logger = Logger('main', 'publish', plain=True) path = os.path.join('data', 'export') try: # upload disabled if not interval: return # update datapackage kaggle.update(path) # start upload elapsed = kaggle.timer.stop(run=False) / 1000 if elapsed > interval: logger.log(f'\n{"-"*45}{"UPLOADING":^15}{"-"*45}\n') kaggle.upload(path) logger.log(f'\n{"-"*45}{"PUBLISHED":^15}{"-"*45}\n') kaggle.timer.reset() except Exception as e: logger.log(f'...publish error {repr(e)}')
print('loss Player2 %s' % all_losses2[-1]) print("---------------------------") timer.update(time.time()) timediff = timer.getTimeDiff() total_time = timer.getTotalTime() loopstogo = (num_frames - i_update) / 100 estimatedtimetogo = timer.getTimeToGo(loopstogo) logger.printDayFormat("runntime last epochs: ", timediff) logger.printDayFormat("total runtime: ", total_time) logger.printDayFormat("estimated time to run: ", estimatedtimetogo) print("######## {0} ########".format(sys.argv[1])) rollout1.after_update() # player1 rollout2.after_update() # player2 if i_update % 1000 == 0 and i_update > 0: logger.log(all_rewards1, "Data/", "all_rewards_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log(all_losses1, "Data/", "all_losses_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log_state_dict(agent1.state_dict(), "Data/agents/agent1_{0}_{1}".format(sys.argv[1], swich_variable)) logger.log(all_rewards2, "Data/", "all_rewards_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log(all_losses2, "Data/", "all_losses_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log_state_dict(agent2.state_dict(), "Data/agents/agent2_{0}_{1}".format(sys.argv[1], swich_variable)) swich_variable += 1 swich_variable %= 2 logger.log(all_rewards1, "Data/", "all_rewards_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log(all_losses1, "Data/", "all_losses_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log_state_dict(agent1.state_dict(), "Data/agents/agent1_{0}_{1}".format(sys.argv[1], swich_variable)) logger.log(all_rewards2, "Data/", "all_rewards_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log(all_losses2, "Data/", "all_losses_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable)) logger.log_state_dict(agent2.state_dict(), "Data/agents/agent2_{0}_{1}".format(sys.argv[1], swich_variable))
class Service(ServiceBase): """ Siterummage Page Store microservice class """ ## Title text logged during initialisation. title_text = 'Site Rummagge Page Store Microservice' ## Copyright text logged on initialisation etc. copyright_text = 'Copyright 2021 Site Rummage' ## License text logged on initialisation etc. license_text = 'All Rights Reserved. Proprietary and confidential' def __init__(self, new_instance): super().__init__() self._quart = new_instance ## Instance of the logging wrapper class self._logger = Logger() ## _is_initialised is inherited from parent class ServiceThread self._is_initialised = False self._configuration = None self._db_interface = None self._api_health = ApiHealth(self._quart) self._api_webpage = None def _initialise(self) -> bool: self._logger.write_to_console = True self._logger.initialise() self._logger.log( LogType.Info, f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})') self._logger.log(LogType.Info, self.copyright_text) self._logger.log(LogType.Info, self.license_text) config_mgr = ConfigurationManager() config_file = os.getenv('SITERUMMAGE_PAGESTORE_CONFIG') self._configuration = config_mgr.parse_config_file(config_file) if not self._configuration: self._logger.log(LogType.Error, config_mgr.last_error_msg) return False self._logger.log(LogType.Info, '+=== Configuration Settings ===+') self._logger.log(LogType.Info, '+==============================+') db_config = self._configuration.db_settings self._logger.log(LogType.Info, '+== Database Settings :->') self._logger.log(LogType.Info, f'+= database : {db_config.database}') self._logger.log(LogType.Info, f'+= host : {db_config.host}') self._logger.log(LogType.Info, f'+= username : {db_config.username}') self._logger.log(LogType.Info, f'+= port : {db_config.port}') self._logger.log(LogType.Info, f'+= pool_name : {db_config.pool_name}') self._logger.log(LogType.Info, f'+= pool_size : {db_config.pool_size}') self._logger.log(LogType.Info, '+==============================+') self._db_interface = DatabaseInterface(self._logger, self._configuration) if not self._db_interface.database_connection_valid(): return False self._api_webpage = ApiWebpage(self._quart, self._db_interface, self._configuration) self._is_initialised = True return True async def _main_loop(self): # if not self._master_thread_class.initialise(): # return False pass def _shutdown(self): self._logger.log(LogType.Info, 'Shutting down...')
class SpiderBase(): def __init__(self, data_adapter_config_path, source_name, encode="utf-8", parse_try_limit=3): self.logger = Logger("spider", source_name) self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path, source_name, self.logger) self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger) self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path, self.logger) self.source_name = source_name self.encode = encode self.parse_try_limit = parse_try_limit self.exploring_times = 0 def url_exists_in_doc_raw(self, url): url_hash = common_utils.gen_url_hash(url) return self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash) def url_hash_exists_in_data_raw(self, url_hash): return self.data_raw_adapter.has_data_raw_by_url_hash(url_hash) def parse(self, url_hash, page, encode, stage, context, created_at, page_crawled_at): ''' you must override this function ''' self.logger.log( "what the hell!!!you have to override to implement parse logic!!!") features = {} images = [] images.append({ "name": "test_image_name", "url": "test_image_url", "image_format": "jpg" }) next_update_time = None children = [] children.append({ "url": "test_url", "stage": "test_stage", "context": "test_context", "operation_flag": SpiderChildNodeOperationFlag.NEW_ADD }) return features, images, next_update_time, children def explore_child(self, father_url_hash, url, url_hash, stage, context, operation_flag): if operation_flag == SpiderChildNodeOperationFlag.NEW_ADD: if not self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash): self.doc_raw_adapter.create_doc_raw(url_hash, url, stage, context, father_url_hash) self.logger.log("child [%s] %s new added." % (url_hash, url)) else: if self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash): if operation_flag == SpiderChildNodeOperationFlag.UPDATE_INFO_ONLY: self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, stage=stage, context=context, father=father_url_hash) self.logger.log("child [%s]'s info is updated." % (url_hash)) elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_REPARSE: self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, stage=stage, context=context, father=father_url_hash, status_flag=DocRawStatus.PAGE_CRAWLED) self.logger.log("child [%s] is set to reparse data." % (url_hash)) elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_RECRAWL: self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, stage=stage, context=context, father=father_url_hash, status_flag=DocRawStatus.NEW_ADDED) self.logger.log("child [%s]'s is set to recrawled page." % (url_hash)) def spider_run(self): for url_hash, url, stage, page, encode, context, created_at, page_crawled_at in self.doc_raw_adapter.load_unparsed_doc_raw( ): try: self.logger.log("parsing [%s]." % (url_hash)) features, images, next_update_time, children = self.parse( url_hash, page, encode, stage, context, created_at, page_crawled_at) if images != None: for i in range(0, len(images)): try: image_id = common_utils.gen_url_hash( images[i]["url"]) if not self.image_store_adapter.has_image_index_by_image_id( image_id): images[i]["image_id"] = image_id self.image_store_adapter.create_image_index( image_id, images[i]["image_format"], images[i]["url"]) self.logger.log( "image [%s] created for [%s]." % (image_id, url_hash)) except BaseException, e: self.logger.log( "Error occured when creating image index: %s" % (e)) if features != None: if not self.url_hash_exists_in_data_raw(url_hash): self.data_raw_adapter.create_data_raw( url_hash, url, features, images) self.logger.log("features for [%s] is added." % (url_hash)) else: self.data_raw_adapter.update_data_raw( url_hash, features, images) self.logger.log("features for [%s] is updated." % (url_hash)) children_url_hashes = None if children != None: children_url_hashes = [] for child in children: try: url_new = child["url"] url_hash_new = common_utils.gen_url_hash( child["url"]) stage_new = child["stage"] context_new = child["context"] operation_flag = child["operation_flag"] self.explore_child(url_hash, url_new, url_hash_new, stage_new, context_new, operation_flag) children_url_hashes.append(url_hash_new) except BaseException, e: self.logger.log( "Error occured when exploring child: %s" % (e)) self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, next_update_time=next_update_time, children=children_url_hashes, status_flag=DocRawStatus.DATA_PARSED) except BaseException, e: self.logger.log("Error occured in main spider_run: %s" % (e)) if url_hash != None: parse_try_times = self.doc_raw_adapter.get_doc_raw_parse_try_times( url_hash) if parse_try_times + 1 >= self.parse_try_limit: self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, status_flag=DocRawStatus.ERROR_FAILED_TO_PARSED) else: self.doc_raw_adapter.update_doc_raw_with_node_info( url_hash, next_update_time=datetime.datetime.now() + datetime.timedelta(86400), parse_try_times=parse_try_times + 1, status_flag=DocRawStatus.NEW_ADDED)
class Defender(Trainer): """ Perform various adversarial attacks and defense on a pretrained model Scheme generates Tensor, not Variable """ def __init__(self, val_loader, args, **kwargs): self.val_loader = val_loader self.args = args self.model = get_model(args) self.step = 0 self.cuda = self.args.cuda self.log_path = ( PROJECT_ROOT / Path("experiments") / Path(datetime.now().strftime("%Y%m%d%H%M%S") + "-")).as_posix() self.log_path = Path(self.get_dirname(self.log_path, args)) if not Path.exists(self.log_path): Path(self.log_path).mkdir(parents=True, exist_ok=True) self.logger = Logger("defense", self.log_path, args.verbose) self.logger.log("Checkpoint files will be saved in {}".format( self.log_path)) self.logger.add_level("ATTACK", 21, 'yellow') self.logger.add_level("DEFENSE", 22, 'cyan') self.logger.add_level("TEST", 23, 'white') self.logger.add_level("DIST", 11, 'white') self.kwargs = kwargs if args.domain_restrict: self.artifact = get_artifact(self.model, val_loader, args) self.kwargs['artifact'] = self.artifact def defend(self): self.model.eval() defense_scheme = getattr(defenses, self.args.defense)(self.model, self.args, **self.kwargs) source = self.model if self.args.source is not None and (self.args.ckpt_name != self.args.ckpt_src): target = self.args.ckpt_name self.args.model = self.args.source self.args.ckpt_name = self.args.ckpt_src source = get_model(self.args) self.logger.log("Transfer attack from {} -> {}".format( self.args.ckpt_src, target)) attack_scheme = getattr(attacks, self.args.attack)(source, self.args, **self.kwargs) eval_metrics = EvaluationMetrics( ['Test/Acc', 'Test/Top5', 'Test/Time']) eval_def_metrics = EvaluationMetrics( ['Def-Test/Acc', 'Def-Test/Top5', 'Def-Test/Time']) attack_metrics = EvaluationMetrics( ['Attack/Acc', 'Attack/Top5', 'Attack/Time']) defense_metrics = EvaluationMetrics( ['Defense/Acc', 'Defense/Top5', 'Defense/Time']) dist_metrics = EvaluationMetrics(['L0', 'L1', 'L2', 'Li']) for i, (images, labels) in enumerate(self.val_loader): self.step += 1 if self.cuda: images = images.cuda() labels = labels.cuda() if self.args.half: images = images.half() # Inference st = time.time() outputs = self.model(self.to_var(images, self.cuda, True)) outputs = outputs.float() _, preds = torch.topk(outputs, 5) acc = (labels == preds.data[:, 0]).float().mean() top5 = torch.sum( (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(), dim=1).mean() eval_metrics.update('Test/Acc', float(acc), labels.size(0)) eval_metrics.update('Test/Top5', float(top5), labels.size(0)) eval_metrics.update('Test/Time', time.time() - st, labels.size(0)) # Attacker st = time.time() adv_images, adv_labels = attack_scheme.generate(images, labels) if isinstance(adv_images, Variable): adv_images = adv_images.data attack_metrics.update('Attack/Time', time.time() - st, labels.size(0)) # Lp distance diff = torch.abs( denormalize(adv_images, self.args.dataset) - denormalize(images, self.args.dataset)) L0 = torch.sum((torch.sum(diff, dim=1) > 1e-3).float().view( labels.size(0), -1), dim=1).mean() diff = diff.view(labels.size(0), -1) L1 = torch.norm(diff, p=1, dim=1).mean() L2 = torch.norm(diff, p=2, dim=1).mean() Li = torch.max(diff, dim=1)[0].mean() dist_metrics.update('L0', float(L0), labels.size(0)) dist_metrics.update('L1', float(L1), labels.size(0)) dist_metrics.update('L2', float(L2), labels.size(0)) dist_metrics.update('Li', float(Li), labels.size(0)) # Defender st = time.time() def_images, def_labels = defense_scheme.generate( adv_images, adv_labels) if isinstance( def_images, Variable ): # FIXME - Variable in Variable out for all methods def_images = def_images.data defense_metrics.update('Defense/Time', time.time() - st, labels.size(0)) self.calc_stats('Attack', adv_images, images, adv_labels, labels, attack_metrics) self.calc_stats('Defense', def_images, images, def_labels, labels, defense_metrics) # Defense-Inference for shift of original image st = time.time() def_images_org, _ = defense_scheme.generate(images, labels) if isinstance( def_images_org, Variable ): # FIXME - Variable in Variable out for all methods def_images_org = def_images_org.data outputs = self.model(self.to_var(def_images_org, self.cuda, True)) outputs = outputs.float() _, preds = torch.topk(outputs, 5) acc = (labels == preds.data[:, 0]).float().mean() top5 = torch.sum( (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(), dim=1).mean() eval_def_metrics.update('Def-Test/Acc', float(acc), labels.size(0)) eval_def_metrics.update('Def-Test/Top5', float(top5), labels.size(0)) eval_def_metrics.update('Def-Test/Time', time.time() - st, labels.size(0)) if self.step % self.args.log_step == 0 or self.step == len( self.val_loader): self.logger.scalar_summary(eval_metrics.avg, self.step, 'TEST') self.logger.scalar_summary(eval_def_metrics.avg, self.step, 'TEST') self.logger.scalar_summary(attack_metrics.avg, self.step, 'ATTACK') self.logger.scalar_summary(defense_metrics.avg, self.step, 'DEFENSE') self.logger.scalar_summary(dist_metrics.avg, self.step, 'DIST') defense_rate = eval_metrics.avg[ 'Test/Acc'] - defense_metrics.avg['Defense/Acc'] if eval_metrics.avg['Test/Acc'] - attack_metrics.avg[ 'Attack/Acc']: defense_rate /= eval_metrics.avg[ 'Test/Acc'] - attack_metrics.avg['Attack/Acc'] else: defense_rate = 0 defense_rate = 1 - defense_rate defense_top5 = eval_metrics.avg[ 'Test/Top5'] - defense_metrics.avg['Defense/Top5'] if eval_metrics.avg['Test/Top5'] - attack_metrics.avg[ 'Attack/Top5']: defense_top5 /= eval_metrics.avg[ 'Test/Top5'] - attack_metrics.avg['Attack/Top5'] else: defense_top5 = 0 defense_top5 = 1 - defense_top5 self.logger.log( "Defense Rate Top1: {:5.3f} | Defense Rate Top5: {:5.3f}". format(defense_rate, defense_top5), 'DEFENSE') if self.step % self.args.img_log_step == 0: image_dict = { 'Original': to_np(denormalize(images, self.args.dataset))[0], 'Attacked': to_np(denormalize(adv_images, self.args.dataset))[0], 'Defensed': to_np(denormalize(def_images, self.args.dataset))[0], 'Perturbation': to_np(denormalize(images - adv_images, self.args.dataset))[0] } self.logger.image_summary(image_dict, self.step) def calc_stats(self, method, gen_images, images, gen_labels, labels, metrics): """gen_images: Generated from attacker or defender Currently just calculating acc and artifact """ success_rate = 0 if not isinstance(gen_images, Variable): gen_images = self.to_var(gen_images.clone(), self.cuda, True) gen_outputs = self.model(gen_images) gen_outputs = gen_outputs.float() _, gen_preds = torch.topk(F.softmax(gen_outputs, dim=1), 5) if isinstance(gen_preds, Variable): gen_preds = gen_preds.data gen_acc = (labels == gen_preds[:, 0]).float().mean() gen_top5 = torch.sum( (labels.unsqueeze(1).repeat(1, 5) == gen_preds).float(), dim=1).mean() metrics.update('{}/Acc'.format(method), float(gen_acc), labels.size(0)) metrics.update('{}/Top5'.format(method), float(gen_top5), labels.size(0)) def to_var(self, x, cuda, volatile=False): """For CPU inference manual cuda setting is needed """ if cuda: x = x.cuda() return torch.autograd.Variable(x, volatile=volatile)
sTestCaseDir = cfg.sResultDir + sTestName + '/' sSampleDir = sTestCaseDir + 'samples/' sCheckpointDir = sTestCaseDir + 'checkpoint/' makedirs(sCheckpointDir) makedirs(sSampleDir) makedirs(sTestCaseDir + 'code/') from common.logger import Logger logger = Logger() logger.set_dir(sTestCaseDir) logger.set_casename(sTestName) logger.log(sTestCaseDir) commandline = '' for arg in ['CUDA_VISIBLE_DEVICES="0" python3'] + sys.argv: commandline += arg + ' ' logger.log(commandline) logger.log(str_flags(cfg.__flags)) copydir(cfg.sSourceDir, sTestCaseDir + 'code') ############################################################################################################################################ tf.logging.set_verbosity(tf.logging.ERROR) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=0, inter_op_parallelism_threads=0)
from client.config import GENERAL from common.logger import Logger, LogLevel logger = Logger("main", logging_level=GENERAL["logging_level"]) logger.log(LogLevel.INFO, "Starting up") """ We mimic the Node-like architecture by separating parts of app into self-contained modules. Initialization is performed during module import (in __init__.py files). """ # Service imports. Used to initialize modules and their respective processes/variables. DO NOT REMOVE THEM! logger.log(LogLevel.INFO, "Starting GPIO adapter") import client.gpio logger.log(LogLevel.INFO, "Starting Device client") import client.socket_connector logger.log(LogLevel.INFO, "Starting Device controller module") import client.board_controller # End of service imports logger.log(LogLevel.INFO, "Start-up complete")
class Service(ServiceBase): """ Siterummage Big Broker microservice class """ ## Title text logged during initialisation. title_text = 'Site Rummagge Big Broker Microservice' ## Copyright text logged on initialisation etc. copyright_text = 'Copyright 2021 Site Rummage' ## License text logged on initialisation etc. license_text = 'All Rights Reserved. Proprietary and confidential' def __init__(self, new_instance): super().__init__() self._quart = new_instance ## Instance of the logging wrapper class self._logger = Logger() ## _is_initialised is inherited from parent class ServiceThread self._is_initialised = False self._configuration = None self._api_schedule = None self._api_node_management = None self._scrape_node_list = ScrapeNodeList() def _initialise(self) -> bool: self._logger.write_to_console = True self._logger.initialise() self._logger.log(LogType.Info, f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})') self._logger.log(LogType.Info, self.copyright_text) self._logger.log(LogType.Info, self.license_text) config_mgr = ConfigurationManager() config_file = os.getenv('SITERUMMAGE_BIGBROKER_CONFIG') self._logger.log(LogType.Info, f'Configuration file: {config_file}') self._configuration = config_mgr.parse_config_file(config_file) if not self._configuration: self._logger.log(LogType.Error, config_mgr.last_error_msg) return False self._logger.log(LogType.Info, '+=== Configuration Settings ===+') self._logger.log(LogType.Info, '+==============================+') page_store_cfg = self._configuration.page_store_api self._logger.log(LogType.Info, '+== Page Store Api :->') self._logger.log(LogType.Info, f'+= host : {page_store_cfg.host}') self._logger.log(LogType.Info, f'+= port : {page_store_cfg.port}') processing_queue_cfg = self._configuration.processing_queue_api self._logger.log(LogType.Info, '+== Page Store Api :->') self._logger.log(LogType.Info, f'+= host : {processing_queue_cfg.host}') self._logger.log(LogType.Info, f'+= port : {processing_queue_cfg.port}') self._logger.log(LogType.Info, '+==============================+') self._api_schedule = ApiSchedule(self._quart, self._configuration) self._api_node_management = ApiNodeManagement(self._quart, self._configuration, self._scrape_node_list) self._is_initialised = True return True async def _main_loop(self): # if not self._master_thread_class.initialise(): # return False pass def _shutdown(self): self._logger.log(LogType.Info, 'Shutting down...')
sTestName = cfg.sDataSet + ('_' + sResultTag if len(sResultTag) else '') sTestCaseDir = cfg.sResultDir + sTestName + '/' sSampleDir = sTestCaseDir + '/sample/' sCheckpointDir = sTestCaseDir + 'checkpoint/' makedirs(sSampleDir) makedirs(sCheckpointDir) makedirs(sTestCaseDir + 'source/code/') makedirs(sTestCaseDir + 'source/common/') logger = Logger() logger.set_dir(sTestCaseDir) logger.set_casename(sTestName) logger.linebreak() logger.log(sTestCaseDir) commandline = '' for arg in ['python3'] + sys.argv: commandline += arg + ' ' logger.log('\n' + commandline + '\n') logger.log(str_flags(cfg.__flags)) logger.log('Using GPU%d\n' % GPU_ID) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=0, inter_op_parallelism_threads=0) config.gpu_options.allow_growth = True sess = tf.Session(config=config)
value_loss = advantages.pow(2).mean() action_loss = -(advantages.data * action_log_probs).mean() optimizer.zero_grad() loss = value_loss * value_loss_coef + action_loss - entropy * entropy_coef loss.backward() nn.utils.clip_grad_norm_(actor_critic.parameters(), max_grad_norm) optimizer.step() if i_update % 100 == 0: all_rewards.append(final_rewards.mean()) all_losses.append(loss.item()) timer.update(time.time()) loopstogo = (num_frames - i_update) / 100 estimatedtimetogo = timer.getTimeToGo(loopstogo) print('epoch %s. reward: %s' % (i_update, np.mean(all_rewards[-10:]))) print('loss %s' % all_losses[-1]) logger.printDayFormat("estimated time to run: ", estimatedtimetogo) print("######## AC_Pacman_{0} ########".format(mode)) rollout.after_update() logger.log(all_rewards, "Data/", "all_rewards_{0}.txt".format(mode)) logger.log(all_losses, "Data/", "all_losses_{0}.txt".format(mode)) logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_{0}".format(mode))
from common.logger import Logger, LogLevel from server.config import GENERAL logger = Logger("main", logging_level=GENERAL["logging_level"]) logger.log(LogLevel.INFO, "Starting up") """ We mimic the Node-like architecture by separating parts of app into self-contained modules. Initialization is performed during module import (in __init__.py files). """ # Service imports. Used to initialize modules and their respective processes/variables. DO NOT REMOVE THEM! logger.log(LogLevel.INFO, "Starting Web and Database service") import server.web logger.log(LogLevel.INFO, "Starting Device server") import server.socket_server # End of service imports logger.log(LogLevel.INFO, "Start-up complete")
print("---------------------------") timer.update(time.time()) timediff = timer.getTimeDiff() total_time = timer.getTotalTime() loopstogo = (num_frames - i_update) / 100 estimatedtimetogo = timer.getTimeToGo(loopstogo) logger.printDayFormat("runntime last epochs: ", timediff) logger.printDayFormat("total runtime: ", total_time) logger.printDayFormat("estimated time to run: ", estimatedtimetogo) print("######## AC_KeyCollect ########") rollout.after_update() # snapshot of weights, data and optimzer every 1000 epochs if i_update % 1000 == 0 and i_update > 0: logger.log(all_rewards, "Data/", "all_rewards_KeyCollect.txt") logger.log(all_losses, "Data/", "all_losses_KeyCollect.txt") logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_KeyCollect") logger.log_state_dict(optimizer.state_dict(), "Data/actor_critic_optimizer_KeyCollect") # final save logger.log(all_rewards, "Data/", "all_rewards_KeyCollect.txt") logger.log(all_losses, "Data/", "all_losses_KeyCollect.txt") logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_KeyCollect") logger.log_state_dict(optimizer.state_dict(), "Data/actor_critic_optimizer_KeyCollect")
class SpiderBase(): def __init__(self, data_adapter_config_path, source_name, encode = "utf-8", parse_try_limit = 3): self.logger = Logger("spider", source_name) self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path, source_name, self.logger) self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger) self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path, self.logger) self.source_name = source_name self.encode = encode self.parse_try_limit = parse_try_limit self.exploring_times = 0 def url_exists_in_doc_raw(self, url): url_hash = common_utils.gen_url_hash(url) return self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash) def url_hash_exists_in_data_raw(self, url_hash): return self.data_raw_adapter.has_data_raw_by_url_hash(url_hash) def parse(self, url_hash, page, encode, stage, context, created_at, page_crawled_at): ''' you must override this function ''' self.logger.log("what the hell!!!you have to override to implement parse logic!!!") features = {} images = [] images.append({"name" : "test_image_name", "url" : "test_image_url", "image_format" : "jpg"}) next_update_time = None children = [] children.append({"url" : "test_url", "stage" : "test_stage", "context" : "test_context", "operation_flag" : SpiderChildNodeOperationFlag.NEW_ADD}) return features, images, next_update_time, children def explore_child(self, father_url_hash, url, url_hash, stage, context, operation_flag): if operation_flag == SpiderChildNodeOperationFlag.NEW_ADD: if not self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash): self.doc_raw_adapter.create_doc_raw(url_hash, url, stage, context, father_url_hash) self.logger.log("child [%s] %s new added."%(url_hash, url)) else: if self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash): if operation_flag == SpiderChildNodeOperationFlag.UPDATE_INFO_ONLY: self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, stage = stage, context = context, father = father_url_hash) self.logger.log("child [%s]'s info is updated."%(url_hash)) elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_REPARSE: self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, stage = stage, context = context, father = father_url_hash, status_flag = DocRawStatus.PAGE_CRAWLED) self.logger.log("child [%s] is set to reparse data."%(url_hash)) elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_RECRAWL: self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, stage = stage, context = context, father = father_url_hash, status_flag = DocRawStatus.NEW_ADDED) self.logger.log("child [%s]'s is set to recrawled page."%(url_hash)) def spider_run(self): for url_hash, url, stage, page, encode, context, created_at, page_crawled_at in self.doc_raw_adapter.load_unparsed_doc_raw(): try: self.logger.log("parsing [%s]."%(url_hash)) features, images, next_update_time, children = self.parse(url_hash, page, encode, stage, context, created_at, page_crawled_at) if images != None: for i in range(0, len(images)): try: image_id = common_utils.gen_url_hash(images[i]["url"]) if not self.image_store_adapter.has_image_index_by_image_id(image_id): images[i]["image_id"] = image_id self.image_store_adapter.create_image_index(image_id, images[i]["image_format"], images[i]["url"]) self.logger.log("image [%s] created for [%s]."%(image_id, url_hash)) except BaseException, e: self.logger.log("Error occured when creating image index: %s"%(e)) if features != None: if not self.url_hash_exists_in_data_raw(url_hash): self.data_raw_adapter.create_data_raw(url_hash, url, features, images) self.logger.log("features for [%s] is added."%(url_hash)) else: self.data_raw_adapter.update_data_raw(url_hash, features, images) self.logger.log("features for [%s] is updated."%(url_hash)) children_url_hashes = None if children != None: children_url_hashes = [] for child in children: try: url_new = child["url"] url_hash_new = common_utils.gen_url_hash(child["url"]) stage_new = child["stage"] context_new = child["context"] operation_flag = child["operation_flag"] self.explore_child(url_hash, url_new, url_hash_new, stage_new, context_new, operation_flag) children_url_hashes.append(url_hash_new) except BaseException, e: self.logger.log("Error occured when exploring child: %s"%(e)) self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, next_update_time = next_update_time, children = children_url_hashes, status_flag = DocRawStatus.DATA_PARSED) except BaseException, e: self.logger.log("Error occured in main spider_run: %s"%(e)) if url_hash != None: parse_try_times = self.doc_raw_adapter.get_doc_raw_parse_try_times(url_hash) if parse_try_times + 1 >= self.parse_try_limit: self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, status_flag = DocRawStatus.ERROR_FAILED_TO_PARSED) else: self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, next_update_time = datetime.datetime.now() + datetime.timedelta(86400), parse_try_times = parse_try_times + 1, status_flag = DocRawStatus.NEW_ADDED)
sTestCaseDir = cfg.sResultDir + sTestName + '/' sSampleDir = sTestCaseDir + '/samples/' sCheckpointDir = sTestCaseDir + '/checkpoint/' makedirs(cfg.sResultDir) makedirs(sTestCaseDir) makedirs(sSampleDir) makedirs(sCheckpointDir) makedirs(sTestCaseDir + '/code/') logger = Logger() logger.set_dir(sTestCaseDir) logger.set_casename(sTestName) logger.log(sTestCaseDir) commandline = '' for arg in ['CUDA_VISIBLE_DEVICES="0" python3'] + sys.argv: commandline += arg + ' ' logger.log(commandline) logger.log(str_flags(cfg.__flags)) copydir(SOURCE_DIR + "code/", sTestCaseDir + '/source/code/') copydir(SOURCE_DIR + "common/", sTestCaseDir + '/source/common/') tf.logging.set_verbosity(tf.logging.ERROR) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True
def ListPasses(outputFilename): c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r")) for compiler_pass in c1File.passes: Logger.log(compiler_pass.name)
class Trainer: """ Train and Validation with single GPU """ def __init__(self, train_loader, val_loader, args): self.train_loader = train_loader self.val_loader = val_loader self.args = args self.model = get_model(args) self.epochs = args.epochs self.total_step = len(train_loader) * args.epochs self.step = 0 self.epoch = 0 self.start_epoch = 1 self.lr = args.learning_rate self.best_acc = 0 # Log self.log_path = ( PROJECT_ROOT / Path(SAVE_DIR) / Path(datetime.now().strftime("%Y%m%d%H%M%S") + "-") ).as_posix() self.log_path = Path(self.get_dirname(self.log_path, args)) if not Path.exists(self.log_path): Path(self.log_path).mkdir(parents=True, exist_ok=True) self.logger = Logger("train", self.log_path, args.verbose) self.logger.log("Checkpoint files will be saved in {}".format(self.log_path)) self.logger.add_level('STEP', 21, 'green') self.logger.add_level('EPOCH', 22, 'cyan') self.logger.add_level('EVAL', 23, 'yellow') self.criterion = nn.CrossEntropyLoss() if self.args.cuda: self.criterion = self.criterion.cuda() if args.half: self.model.half() self.criterion.half() params = self.model.parameters() self.optimizer = get_optimizer(args.optimizer, params, args) def train(self): self.eval() for self.epoch in range(self.start_epoch, self.args.epochs+1): self.adjust_learning_rate([int(self.args.epochs/2), int(self.args.epochs*3/4)], factor=0.1) self.train_epoch() self.eval() self.logger.writer.export_scalars_to_json( self.log_path.as_posix() + "/scalars-{}-{}-{}.json".format( self.args.model, self.args.seed, self.args.activation ) ) self.logger.writer.close() def train_epoch(self): self.model.train() eval_metrics = EvaluationMetrics(['Loss', 'Acc', 'Time']) for i, (images, labels) in enumerate(self.train_loader): st = time.time() self.step += 1 images = torch.autograd.Variable(images) labels = torch.autograd.Variable(labels) if self.args.cuda: images = images.cuda() labels = labels.cuda() if self.args.half: images = images.half() outputs, loss = self.compute_loss(images, labels) self.optimizer.zero_grad() loss.backward() self.optimizer.step() outputs = outputs.float() loss = loss.float() elapsed_time = time.time() - st _, preds = torch.max(outputs, 1) accuracy = (labels == preds.squeeze()).float().mean() batch_size = labels.size(0) eval_metrics.update('Loss', float(loss), batch_size) eval_metrics.update('Acc', float(accuracy), batch_size) eval_metrics.update('Time', elapsed_time, batch_size) if self.step % self.args.log_step == 0: self.logger.scalar_summary(eval_metrics.val, self.step, 'STEP') # Histogram of parameters for tag, p in self.model.named_parameters(): tag = tag.split(".") tag = "Train_{}".format(tag[0]) + "/" + "/".join(tag[1:]) try: self.logger.writer.add_histogram(tag, p.clone().cpu().data.numpy(), self.step) self.logger.writer.add_histogram(tag+'/grad', p.grad.clone().cpu().data.numpy(), self.step) except Exception as e: print("Check if variable {} is not used: {}".format(tag, e)) self.logger.scalar_summary(eval_metrics.avg, self.step, 'EPOCH') def eval(self): self.model.eval() eval_metrics = EvaluationMetrics(['Loss', 'Acc', 'Time']) for i, (images, labels) in enumerate(self.val_loader): st = time.time() images = torch.autograd.Variable(images) labels = torch.autograd.Variable(labels) if self.args.cuda: images = images.cuda() labels = labels.cuda() if self.args.half: images = images.half() outputs, loss = self.compute_loss(images, labels) outputs = outputs.float() loss = loss.float() elapsed_time = time.time() - st _, preds = torch.max(outputs, 1) accuracy = (labels == preds.squeeze()).float().mean() batch_size = labels.size(0) eval_metrics.update('Loss', float(loss), batch_size) eval_metrics.update('Acc', float(accuracy), batch_size) eval_metrics.update('Time', elapsed_time, batch_size) # Save best model if eval_metrics.avg['Acc'] > self.best_acc: self.save() self.logger.log("Saving best model: epoch={}".format(self.epoch)) self.best_acc = eval_metrics.avg['Acc'] self.maybe_delete_old_pth(log_path=self.log_path.as_posix(), max_to_keep=1) self.logger.scalar_summary(eval_metrics.avg, self.step, 'EVAL') def get_dirname(self, path, args): path += "{}-".format(getattr(args, 'dataset')) path += "{}-".format(getattr(args, 'seed')) path += "{}".format(getattr(args, 'model')) return path def save(self, filename=None): if filename is None: filename = os.path.join(self.log_path, 'model-{}.pth'.format(self.epoch)) torch.save({ 'model': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'epoch': self.start_epoch, 'best_acc': self.best_acc, 'args': self.args }, filename) def load(self, filename=None): if filename is None: filename = self.log_path S = torch.load(filename) if self.args.cuda else torch.load(filename, map_location=lambda storage, location: storage) self.model.load_state_dict(S['model']) self.optimizer.load_state_dict(S['optimizer']) self.epoch = S['epoch'] self.best_acc = S['best_acc'] self.args = S['args'] def maybe_delete_old_pth(self, log_path, max_to_keep): """Model filename must end with xxx-xxx-[epoch].pth """ # filename and time pths = [(f, int(f[:-4].split("-")[-1])) for f in os.listdir(log_path) if f.endswith('.pth')] if len(pths) > max_to_keep: sorted_pths = sorted(pths, key=lambda tup: tup[1]) for i in range(len(pths) - max_to_keep): os.remove(os.path.join(log_path, sorted_pths[i][0])) def show_current_model(self): print("\n".join("{}: {}".format(k, v) for k, v in sorted(vars(self.args).items()))) model_parameters = filter(lambda p: p.requires_grad, self.model.parameters()) total_params = np.sum([np.prod(p.size()) for p in model_parameters]) print('%s\n\n'%(type(self.model))) print('%s\n\n'%(inspect.getsource(self.model.__init__))) print('%s\n\n'%(inspect.getsource(self.model.forward))) # Total 95 print("*"*40 + "%10s" % self.args.model + "*"*45) print("*"*40 + "PARAM INFO" + "*"*45) print("-"*95) print("| %40s | %25s | %20s |" % ("Param Name", "Shape", "Number of Params")) print("-"*95) for name, param in self.model.named_parameters(): if param.requires_grad: print("| %40s | %25s | %20d |" % (name, list(param.size()), np.prod(param.size()))) print("-"*95) print("Total Params: %d" % (total_params)) print("*"*95) def adjust_learning_rate(self, milestone, factor=0.1): if self.epoch in milestone: self.lr *= factor for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr def compute_loss(self, images, labels): outputs = self.model(images) loss = self.criterion(outputs, labels) return outputs, loss
help='run loaders periodically in background') argp.add_argument('-publish', type=int, default=None, help='publish datasets to kaggle every x seconds') argp.add_argument('-pause', type=int, default=None, help='pause x seconds after fetching a subreddit') args = argp.parse_args() # handle process termination signal.signal(signal.SIGTERM, terminate) try: logger.log(f'\n{"-"*45}{"ENVIRONMENT":^15}{"-"*45}\n') logger.log(Env.init()) logger.log(f'\n{"-"*45}{"STARTED":^15}{"-"*45}\n') # load config root = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(root, args.config)) as f: config = json.load(f) # kaggle client kaggle = Kaggle(config=os.path.join('config', 'kaggle.json')) # start background tasks while not terminated: for subreddit in args.subreddits:
class ImageCrawler: NUM_PER_FETCH = 100 NUM_PROCESSES = 10 def __init__(self, database_config_path): self.queue = JoinableQueue() self.logger = Logger("image_crawler") self.adapter = ImageStoreAdapter(database_config_path, self.logger) def produce(self): while True: if self.queue.empty(): for image_id, link in self.adapter.load_undownloaded_images( self.NUM_PER_FETCH): self.logger.log("Producer: add new image to crawl:" + image_id + " " + link) self.queue.put((image_id, link)) time.sleep(10) def consume(self, process_id): while True: self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue") if not self.queue.empty(): image_id, link = self.queue.get() self.logger.log("Consumer process:" + str(process_id) + " start crawling " + str(link)) image = common_utils.page_crawl(link) if image != None: self.logger.log(link + "crawled successfully") self.adapter.store_image(image_id, image) else: self.logger.log(link + " failed at crawling") self.adapter.update_image_status( image_id, ImageIndexStatus.DOWNLOAD_FAILED) self.queue.task_done() time.sleep(1) else: self.logger.log("Queue empty") time.sleep(10) def run(self): producer = Process(target=self.produce) producer.start() consumers = [] for i in range(self.NUM_PROCESSES): consumer = Process(target=self.consume, args=(i, )) consumers.append(consumer) consumer.start() for consumer in consumers: consumer.join() producer.join() self.queue.join()
def list_passes(output_filename): c1_file = parse_c1_visualizer_stream(output_filename, open(output_filename, "r")) for compiler_pass in c1_file.passes: Logger.log(compiler_pass.name)
class Service(ServiceBase): """ Siterummage Processing Queue microservice class """ #pylint: disable=too-many-instance-attributes ## Title text logged during initialisation. title_text = 'Site Rummagge Processing Queue Microservice' ## Copyright text logged on initialisation etc. copyright_text = 'Copyright 2021 Site Rummage' ## License text logged on initialisation etc. license_text = 'All Rights Reserved. Proprietary and confidential' def __init__(self, new_instance): super().__init__() self._quart = new_instance ## Instance of the logging wrapper class self._logger = Logger() ## _is_initialised is inherited from parent class ServiceThread self._is_initialised = False self._configuration = None self._db_interface = None self._api_queue = None self._queue_cache = None self._processing_queue = None def _initialise(self) -> bool: self._logger.write_to_console = True self._logger.initialise() self._logger.log( LogType.Info, f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})') self._logger.log(LogType.Info, self.copyright_text) self._logger.log(LogType.Info, self.license_text) config_mgr = ConfigurationManager() config_file = os.getenv('SITERUMMAGE_PROCESSINGQUEUE_CONFIG') self._configuration = config_mgr.parse_config_file(config_file) if not self._configuration: self._logger.log(LogType.Error, config_mgr.last_error_msg) return False self._logger.log(LogType.Info, '+=== Configuration Settings ===+') self._logger.log(LogType.Info, '+==============================+') db_config = self._configuration.db_settings self._logger.log(LogType.Info, '+== Database Settings :->') self._logger.log(LogType.Info, f'+= Cache Size : {db_config.cache_size}') self._logger.log( LogType.Info, f'+= DB Filename : {db_config.database_file}') self._logger.log( LogType.Info, f'+= Fail On No Database : {db_config.fail_on_no_database}') self._logger.log(LogType.Info, '+== Api Settings :->') self._logger.log(LogType.Info, '+= Auth Key : ******') self._logger.log(LogType.Info, '+==============================+') self._db_interface = DbInterface(db_config.database_file) if not self._db_interface.database_exists(): if self._configuration.db_settings.fail_on_no_database: self._logger.log(LogType.Error, "DB doesn't exist and fail on create is set") return False if not self._db_interface.build_database(): self._logger.log(LogType.Error, self._db_interface.last_error_message) return False self._logger.log(LogType.Info, 'Database created successfully') if not self._db_interface.open(): self._logger.log(LogType.Error, self._db_interface.last_error_message) return False self._processing_queue = UrlsBeingProcessed() self._queue_cache = QueueCache(self._db_interface, self._configuration, self._logger, self._processing_queue) self._api_queue = ApiQueue(self._quart, self._db_interface, self._configuration, self._processing_queue, self._queue_cache) self._is_initialised = True return True async def _main_loop(self): ... def _shutdown(self): self._logger.log(LogType.Info, 'Shutting down...') if self._db_interface.is_connected: self._db_interface.close() self._logger.log(LogType.Info, '|-> Database connection closed')
class ImageCrawler: NUM_PER_FETCH = 100 NUM_PROCESSES = 10 def __init__(self, database_config_path): self.queue = JoinableQueue() self.logger = Logger("image_crawler") self.adapter = ImageStoreAdapter(database_config_path, self.logger) def produce(self): while True: if self.queue.empty(): for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH): self.logger.log("Producer: add new image to crawl:" + image_id + " " + link) self.queue.put((image_id, link)) time.sleep(10) def consume(self, process_id): while True: self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue") if not self.queue.empty(): image_id, link = self.queue.get() self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link)) image = common_utils.page_crawl(link) if image != None: self.logger.log(link + "crawled successfully") self.adapter.store_image(image_id, image) else: self.logger.log(link + " failed at crawling") self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED) self.queue.task_done() time.sleep(1) else: self.logger.log("Queue empty") time.sleep(10) def run(self): producer = Process(target=self.produce) producer.start() consumers = [] for i in range(self.NUM_PROCESSES): consumer = Process(target=self.consume, args=(i,)) consumers.append(consumer) consumer.start() for consumer in consumers: consumer.join() producer.join() self.queue.join()