Example #1
0
class ClassBase(object):
    """
    This class serves as inheritance base for every other class that will require universal features, like logger,
    decryptor and such.
    """
    def __init__(self, logger_name, logging_level):
        self._logger = Logger(logger_name, logging_level)

    def log(self, level, text):
        """
        A simple bridge-method, used to pass values to instance's logger.

        :param level: String - level of log message
        :param text: String - message itself
        :return: None
        """
        self._logger.log(level, text)

    def get_cipher(self):
        """
        Creates and returns an instance of AESCipher class - wrapper for AES encryption processing.
        :return: AESCipher class instance
        """
        return AESCipher()

    def get_current_datetime_string(self):
        """
        Returns a current datetime stamp, formatted in readable string
        :return: Timestamp string
        """
        return datetime.now().strftime(get_time_formatter())
Example #2
0
def MatchFiles(checkerFile, c1File, targetArch, debuggableMode):
  for testCase in checkerFile.testCases:
    if testCase.testArch not in [None, targetArch]:
      continue
    if testCase.forDebuggable != debuggableMode:
      continue

    # TODO: Currently does not handle multiple occurrences of the same group
    # name, e.g. when a pass is run multiple times. It will always try to
    # match a check group against the first output group of the same name.
    c1Pass = c1File.findPass(testCase.name)
    if c1Pass is None:
      with file(c1File.fileName) as cfgFile:
        Logger.log(''.join(cfgFile), Logger.Level.Error)
      Logger.fail("Test case not found in the CFG file",
                  testCase.fileName, testCase.startLineNo, testCase.name)

    Logger.startTest(testCase.name)
    try:
      MatchTestCase(testCase, c1Pass, c1File.instructionSetFeatures)
      Logger.testPassed()
    except MatchFailedException as e:
      lineNo = c1Pass.startLineNo + e.lineNo
      if e.statement.variant == TestStatement.Variant.Not:
        msg = "NOT statement matched line {}"
      else:
        msg = "Statement could not be matched starting from line {}"
      msg = msg.format(lineNo)
      with file(c1File.fileName) as cfgFile:
        Logger.log(''.join(cfgFile), Logger.Level.Error)
      Logger.testFailed(msg, e.statement, e.variables)
Example #3
0
class BatchCrawler():
    
    MAX_DOCS_NUM = 100
    
    def __init__(self, database_config_path, source_name, domain, encode, request_interval):
        self.logger = Logger("crawler", domain)
        self.adapter = DocRawAdapter(database_config_path, source_name, self.logger)
        self.domain = domain
        self.encode = encode 
        self.request_interval = request_interval
    
    def run(self):
        while True:
            count = 0
            try:
                for url_hash, url in self.adapter.load_uncrawled_docs(BatchCrawler.MAX_DOCS_NUM):
                    count += 1
                    self.logger.log("crawling url %s"%url, 2)
                    page = common_utils.page_crawl(url)
                    if page == None:
                        self.adapter.update_doc_raw_as_crawled_failed(url_hash)
                        continue
                    if self.encode != "utf-8":
                        page = unicode(page, self.encode).encode("utf-8")

                    self.adapter.update_doc_raw_with_crawled_page(url_hash, "utf-8", page)
                    time.sleep(float(self.request_interval))
                if count < BatchCrawler.MAX_DOCS_NUM:
                    break
            except:
                self.logger.log("mongo error")
Example #4
0
class BatchCrawler():

    MAX_DOCS_NUM = 100

    def __init__(self, database_config_path, source_name, domain, encode,
                 request_interval):
        self.logger = Logger("crawler", domain)
        self.adapter = DocRawAdapter(database_config_path, source_name,
                                     self.logger)
        self.domain = domain
        self.encode = encode
        self.request_interval = request_interval

    def run(self):
        while True:
            count = 0
            try:
                for url_hash, url in self.adapter.load_uncrawled_docs(
                        BatchCrawler.MAX_DOCS_NUM):
                    count += 1
                    self.logger.log("crawling url %s" % url, 2)
                    page = common_utils.page_crawl(url)
                    if page == None:
                        self.adapter.update_doc_raw_as_crawled_failed(url_hash)
                        continue
                    if self.encode != "utf-8":
                        page = unicode(page, self.encode).encode("utf-8")

                    self.adapter.update_doc_raw_with_crawled_page(
                        url_hash, "utf-8", page)
                    time.sleep(float(self.request_interval))
                if count < BatchCrawler.MAX_DOCS_NUM:
                    break
            except:
                self.logger.log("mongo error")
Example #5
0
def fetch(config, subreddit):
    logger = Logger('main', 'fetch', plain=True)

    loaders = []
    try:
        # pushshift
        pushshift = Pushshift(root, config, subreddit)
        loaders.append(pushshift)

        # crawler
        crawler = Crawler(root, config, subreddit)
        loaders.append(crawler)

        # praw
        praw = Praw(root, config, subreddit)
        loaders.append(praw)

        # start loader threads
        background = False  # TODO thread implementation
        for loader in loaders:
            if background:
                loader.start()
            else:
                loader.run()

        # wait until abort
        while background:
            Sleep(1)

    except KeyboardInterrupt:
        for loader in loaders:
            loader.stop(1)
        raise KeyboardInterrupt()
    except Exception as e:
        logger.log(f'...fetch error {repr(e)}')
Example #6
0
def match_files(checker_file, c1_file, target_arch, debuggable_mode,
                print_cfg):
    for test_case in checker_file.test_cases:
        if test_case.test_arch not in [None, target_arch]:
            continue
        if test_case.for_debuggable != debuggable_mode:
            continue

        # TODO: Currently does not handle multiple occurrences of the same group
        # name, e.g. when a pass is run multiple times. It will always try to
        # match a check group against the first output group of the same name.
        c1_pass = c1_file.find_pass(test_case.name)
        if c1_pass is None:
            with open(c1_file.full_file_name) as cfg_file:
                Logger.log("".join(cfg_file), Logger.Level.ERROR)
            Logger.fail("Test case not found in the CFG file",
                        c1_file.full_file_name, test_case.start_line_no,
                        test_case.name)

        Logger.start_test(test_case.name)
        try:
            match_test_case(test_case, c1_pass,
                            c1_file.instruction_set_features)
            Logger.test_passed()
        except MatchFailedException as e:
            line_no = c1_pass.start_line_no + e.line_no
            if e.statement.variant == TestStatement.Variant.NOT:
                msg = "NOT statement matched line {}"
            else:
                msg = "Statement could not be matched starting from line {}"
            msg = msg.format(line_no)
            if print_cfg:
                with open(c1_file.full_file_name) as cfg_file:
                    Logger.log("".join(cfg_file), Logger.Level.ERROR)
            Logger.test_failed(msg, e.statement, e.variables)
Example #7
0
def test(idx, args, T, shared_net, path):
    device = torch.device("cuda")

    torch.manual_seed(args.seed + idx)
    if args.cuda:
        torch.cuda.manual_seed(args.seed + idx)

    env = make_env(args.env,
                   stack_frames=args.stacked_frames,
                   max_episode_steps=args.max_episode_steps,
                   episodic_life=True,
                   reward_clipping=False)
    env.seed(args.seed + idx)

    state = env.reset()
    state_v = torch.from_numpy(state).float().to(device)
    hx = torch.zeros(1, 512).to(device)
    cx = torch.zeros(1, 512).to(device)

    info = True  # game is real done, not end of a life (EpisodicLife)

    net = A3C_LSTM(env.observation_space.shape[0],
                   env.action_space.n).to(device)
    net.eval()

    logger = Logger(name="test",
                    path=path,
                    model=shared_net,
                    start_time=time.time(),
                    print_log=True,
                    save_model=True)

    while T.value < args.num_timesteps:
        # Synchronize thread-specific parameters
        if info:
            net.load_state_dict(shared_net.state_dict())

        # Perform action according to policy
        with torch.no_grad():
            value_v, logit_v, (hx, cx) = net(state_v.unsqueeze(0), (hx, cx))
        prob_v = F.softmax(logit_v, dim=1)
        action_v = torch.multinomial(prob_v, num_samples=1)
        action = int(action_v.item())

        # Receive reward and new state
        state, reward, done, info = env.step(action)
        state_v = torch.from_numpy(state).float().to(device)

        logger.log(T.value, reward, info)

        if done:
            state = env.reset()
            state_v = torch.from_numpy(state).float().to(device)
            hx = torch.zeros(1, 512).to(device)
            cx = torch.zeros(1, 512).to(device)
Example #8
0
def DumpPass(outputFilename, passName):
  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
  compiler_pass = c1File.findPass(passName)
  if compiler_pass:
    maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body)
    lenLineNo = len(str(maxLineNo)) + 2
    curLineNo = compiler_pass.startLineNo
    for line in compiler_pass.body:
      Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line)
      curLineNo += 1
  else:
    Logger.fail("Pass \"" + passName + "\" not found in the output")
Example #9
0
def DumpPass(outputFilename, passName):
  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
  compiler_pass = c1File.findPass(passName)
  if compiler_pass:
    maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body)
    lenLineNo = len(str(maxLineNo)) + 2
    curLineNo = compiler_pass.startLineNo
    for line in compiler_pass.body:
      Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line)
      curLineNo += 1
  else:
    Logger.fail("Pass \"" + passName + "\" not found in the output")
Example #10
0
def dump_pass(output_filename, pass_name):
    c1_file = parse_c1_visualizer_stream(output_filename,
                                         open(output_filename, "r"))
    compiler_pass = c1_file.find_pass(pass_name)
    if compiler_pass:
        max_line_no = compiler_pass.start_line_no + len(compiler_pass.body)
        len_line_no = len(str(max_line_no)) + 2
        cur_line_no = compiler_pass.start_line_no
        for line in compiler_pass.body:
            Logger.log((str(cur_line_no) + ":").ljust(len_line_no) + line)
            cur_line_no += 1
    else:
        Logger.fail('Pass "{}" not found in the output'.format(pass_name))
 def check_stop(self):
     # check stop scan
     task = self.web_api.get_scan(self.new_task)
     current_id = task["start_id"]
     """Check stopped"""
     if current_id != self.start_id:
         self.new_task["status"] = 3
         self.web_api.put(self.new_task)
         self.web_api.post_notify(
             message=unicode(u"Stopped scanning task : " +
                             str(self.new_task["id"])),
             status=1)
         Logger.log("Stopped task " + str(self.new_task["id"]))
         return True
     return False
Example #12
0
class Testset:
  """Top class for testset, a collection of testcases."""

  def __init__(self, working_dir : str = None, testcases : list = [], runner : str = None, logger : Logger = None, testset_name=""):
    """Testset constructor."""
    self.working_dir = working_dir if working_dir is not None else ""
    self.testcases = testcases
    self.logger = Logger() if logger is None else logger
    self.testset_name = testset_name

  def append(self, testcase : Testcase):
    """Appends a testcase to the testset."""
    self.testcases.append(testcase)

  def run(self):
    """Run all testcases in testset."""
    logname = os.path.join("", *[self.working_dir, self.testset_name + ".log"])
    self.logger.open_file_log(filename=logname)
    self.results = []
    n_testcases = len(self.testcases)
    i = 1
    for testcase in self.testcases:
      testcase.set_logger(self.logger)
      print ("Running test %d / %d ..." % (i, n_testcases))
      i += 1
      if testcase.expected_to_fail:
        self.results.append(not testcase.run())
      else:
        self.results.append(testcase.run())
    self.log_results()
    if False in self.results:
      print("%d / %d testcase(s) FAILED" % (self.results.count(False), n_testcases))
    else:
      print("All %d testcase(s) PASSED" % (n_testcases))
    self.logger.close()
    return self.results

  def log_results(self, results=None):
    """Logs all testcases in testset and their pass / fail status."""
    results = results if results is not None else self.results
    testcase_max_length = len(max(self.testcases, key=lambda t : len(t.testcase_name)).testcase_name) # go over all testcase names, find (one of) the biggest and get the size of it
    result_fmt_str = "  %%-%ds : %%s" % (testcase_max_length)
    self.logger.log("Results:")
    for test_result in zip(self.testcases, self.results):
      self.logger.log(result_fmt_str % (test_result[0].testcase_name, "PASSED" if test_result[1] is True else "FAILED"))
Example #13
0
def publish(interval, kaggle):
    logger = Logger('main', 'publish', plain=True)

    path = os.path.join('data', 'export')
    try:
        # upload disabled
        if not interval:
            return

        # update datapackage
        kaggle.update(path)

        # start upload
        elapsed = kaggle.timer.stop(run=False) / 1000
        if elapsed > interval:
            logger.log(f'\n{"-"*45}{"UPLOADING":^15}{"-"*45}\n')
            kaggle.upload(path)
            logger.log(f'\n{"-"*45}{"PUBLISHED":^15}{"-"*45}\n')
            kaggle.timer.reset()

    except Exception as e:
        logger.log(f'...publish error {repr(e)}')
Example #14
0
            print('loss Player2 %s' % all_losses2[-1])
            print("---------------------------")
            
            timer.update(time.time())            
            timediff = timer.getTimeDiff()
            total_time = timer.getTotalTime()
            loopstogo = (num_frames - i_update) / 100
            estimatedtimetogo = timer.getTimeToGo(loopstogo)
            logger.printDayFormat("runntime last epochs: ", timediff)
            logger.printDayFormat("total runtime: ", total_time)
            logger.printDayFormat("estimated time to run: ", estimatedtimetogo)           
            print("######## {0} ########".format(sys.argv[1]))
        rollout1.after_update() # player1
        rollout2.after_update() # player2

        if i_update % 1000 == 0 and i_update > 0:
            logger.log(all_rewards1, "Data/", "all_rewards_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable))  
            logger.log(all_losses1, "Data/", "all_losses_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable))      
            logger.log_state_dict(agent1.state_dict(), "Data/agents/agent1_{0}_{1}".format(sys.argv[1], swich_variable))    
            logger.log(all_rewards2, "Data/", "all_rewards_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable))  
            logger.log(all_losses2, "Data/", "all_losses_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable))      
            logger.log_state_dict(agent2.state_dict(), "Data/agents/agent2_{0}_{1}".format(sys.argv[1], swich_variable))
            swich_variable += 1
            swich_variable %= 2

    logger.log(all_rewards1, "Data/", "all_rewards_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable))  
    logger.log(all_losses1, "Data/", "all_losses_p1_{0}_{1}.txt".format(sys.argv[1], swich_variable))      
    logger.log_state_dict(agent1.state_dict(), "Data/agents/agent1_{0}_{1}".format(sys.argv[1], swich_variable))    
    logger.log(all_rewards2, "Data/", "all_rewards_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable))  
    logger.log(all_losses2, "Data/", "all_losses_p2_{0}_{1}.txt".format(sys.argv[1], swich_variable))      
    logger.log_state_dict(agent2.state_dict(), "Data/agents/agent2_{0}_{1}".format(sys.argv[1], swich_variable))
Example #15
0
class Service(ServiceBase):
    """ Siterummage Page Store microservice class """

    ## Title text logged during initialisation.
    title_text = 'Site Rummagge Page Store Microservice'

    ## Copyright text logged on initialisation etc.
    copyright_text = 'Copyright 2021 Site Rummage'

    ## License text logged on initialisation etc.
    license_text = 'All Rights Reserved. Proprietary and confidential'

    def __init__(self, new_instance):
        super().__init__()

        self._quart = new_instance

        ## Instance of the logging wrapper class
        self._logger = Logger()

        ## _is_initialised is inherited from parent class ServiceThread
        self._is_initialised = False

        self._configuration = None

        self._db_interface = None

        self._api_health = ApiHealth(self._quart)
        self._api_webpage = None

    def _initialise(self) -> bool:
        self._logger.write_to_console = True
        self._logger.initialise()

        self._logger.log(
            LogType.Info,
            f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})')
        self._logger.log(LogType.Info, self.copyright_text)
        self._logger.log(LogType.Info, self.license_text)

        config_mgr = ConfigurationManager()

        config_file = os.getenv('SITERUMMAGE_PAGESTORE_CONFIG')

        self._configuration = config_mgr.parse_config_file(config_file)
        if not self._configuration:
            self._logger.log(LogType.Error, config_mgr.last_error_msg)
            return False

        self._logger.log(LogType.Info, '+=== Configuration Settings ===+')
        self._logger.log(LogType.Info, '+==============================+')
        db_config = self._configuration.db_settings
        self._logger.log(LogType.Info, '+== Database Settings :->')
        self._logger.log(LogType.Info, f'+= database  : {db_config.database}')
        self._logger.log(LogType.Info, f'+= host      : {db_config.host}')
        self._logger.log(LogType.Info, f'+= username  : {db_config.username}')
        self._logger.log(LogType.Info, f'+= port      : {db_config.port}')
        self._logger.log(LogType.Info, f'+= pool_name : {db_config.pool_name}')
        self._logger.log(LogType.Info, f'+= pool_size : {db_config.pool_size}')
        self._logger.log(LogType.Info, '+==============================+')

        self._db_interface = DatabaseInterface(self._logger,
                                               self._configuration)

        if not self._db_interface.database_connection_valid():
            return False

        self._api_webpage = ApiWebpage(self._quart, self._db_interface,
                                       self._configuration)

        self._is_initialised = True

        return True

    async def _main_loop(self):
        # if not self._master_thread_class.initialise():
        #     return False
        pass

    def _shutdown(self):
        self._logger.log(LogType.Info, 'Shutting down...')
Example #16
0
class SpiderBase():
    def __init__(self,
                 data_adapter_config_path,
                 source_name,
                 encode="utf-8",
                 parse_try_limit=3):
        self.logger = Logger("spider", source_name)

        self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path,
                                             source_name, self.logger)
        self.data_raw_adapter = DataRawAdapter(data_adapter_config_path,
                                               source_name, self.logger)
        self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path,
                                                     self.logger)
        self.source_name = source_name
        self.encode = encode
        self.parse_try_limit = parse_try_limit
        self.exploring_times = 0

    def url_exists_in_doc_raw(self, url):
        url_hash = common_utils.gen_url_hash(url)
        return self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash)

    def url_hash_exists_in_data_raw(self, url_hash):
        return self.data_raw_adapter.has_data_raw_by_url_hash(url_hash)

    def parse(self, url_hash, page, encode, stage, context, created_at,
              page_crawled_at):
        '''
        you must override this function
        '''
        self.logger.log(
            "what the hell!!!you have to override to implement parse logic!!!")

        features = {}

        images = []
        images.append({
            "name": "test_image_name",
            "url": "test_image_url",
            "image_format": "jpg"
        })

        next_update_time = None

        children = []
        children.append({
            "url": "test_url",
            "stage": "test_stage",
            "context": "test_context",
            "operation_flag": SpiderChildNodeOperationFlag.NEW_ADD
        })

        return features, images, next_update_time, children

    def explore_child(self, father_url_hash, url, url_hash, stage, context,
                      operation_flag):
        if operation_flag == SpiderChildNodeOperationFlag.NEW_ADD:
            if not self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
                self.doc_raw_adapter.create_doc_raw(url_hash, url, stage,
                                                    context, father_url_hash)
                self.logger.log("child [%s] %s new added." % (url_hash, url))

        else:
            if self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
                if operation_flag == SpiderChildNodeOperationFlag.UPDATE_INFO_ONLY:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(
                        url_hash,
                        stage=stage,
                        context=context,
                        father=father_url_hash)
                    self.logger.log("child [%s]'s info is updated." %
                                    (url_hash))
                elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_REPARSE:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(
                        url_hash,
                        stage=stage,
                        context=context,
                        father=father_url_hash,
                        status_flag=DocRawStatus.PAGE_CRAWLED)
                    self.logger.log("child [%s] is set to reparse data." %
                                    (url_hash))
                elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_RECRAWL:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(
                        url_hash,
                        stage=stage,
                        context=context,
                        father=father_url_hash,
                        status_flag=DocRawStatus.NEW_ADDED)
                    self.logger.log("child [%s]'s is set to recrawled page." %
                                    (url_hash))

    def spider_run(self):
        for url_hash, url, stage, page, encode, context, created_at, page_crawled_at in self.doc_raw_adapter.load_unparsed_doc_raw(
        ):
            try:
                self.logger.log("parsing [%s]." % (url_hash))
                features, images, next_update_time, children = self.parse(
                    url_hash, page, encode, stage, context, created_at,
                    page_crawled_at)
                if images != None:
                    for i in range(0, len(images)):
                        try:
                            image_id = common_utils.gen_url_hash(
                                images[i]["url"])
                            if not self.image_store_adapter.has_image_index_by_image_id(
                                    image_id):
                                images[i]["image_id"] = image_id
                                self.image_store_adapter.create_image_index(
                                    image_id, images[i]["image_format"],
                                    images[i]["url"])
                                self.logger.log(
                                    "image [%s] created for [%s]." %
                                    (image_id, url_hash))
                        except BaseException, e:
                            self.logger.log(
                                "Error occured when creating image index: %s" %
                                (e))

                if features != None:
                    if not self.url_hash_exists_in_data_raw(url_hash):
                        self.data_raw_adapter.create_data_raw(
                            url_hash, url, features, images)
                        self.logger.log("features for [%s] is added." %
                                        (url_hash))
                    else:
                        self.data_raw_adapter.update_data_raw(
                            url_hash, features, images)
                        self.logger.log("features for [%s] is updated." %
                                        (url_hash))

                children_url_hashes = None
                if children != None:
                    children_url_hashes = []
                    for child in children:
                        try:
                            url_new = child["url"]
                            url_hash_new = common_utils.gen_url_hash(
                                child["url"])
                            stage_new = child["stage"]
                            context_new = child["context"]
                            operation_flag = child["operation_flag"]

                            self.explore_child(url_hash, url_new, url_hash_new,
                                               stage_new, context_new,
                                               operation_flag)

                            children_url_hashes.append(url_hash_new)
                        except BaseException, e:
                            self.logger.log(
                                "Error occured when exploring child: %s" % (e))

                self.doc_raw_adapter.update_doc_raw_with_node_info(
                    url_hash,
                    next_update_time=next_update_time,
                    children=children_url_hashes,
                    status_flag=DocRawStatus.DATA_PARSED)

            except BaseException, e:
                self.logger.log("Error occured in main spider_run: %s" % (e))
                if url_hash != None:
                    parse_try_times = self.doc_raw_adapter.get_doc_raw_parse_try_times(
                        url_hash)
                    if parse_try_times + 1 >= self.parse_try_limit:
                        self.doc_raw_adapter.update_doc_raw_with_node_info(
                            url_hash,
                            status_flag=DocRawStatus.ERROR_FAILED_TO_PARSED)
                    else:
                        self.doc_raw_adapter.update_doc_raw_with_node_info(
                            url_hash,
                            next_update_time=datetime.datetime.now() +
                            datetime.timedelta(86400),
                            parse_try_times=parse_try_times + 1,
                            status_flag=DocRawStatus.NEW_ADDED)
class Defender(Trainer):
    """ Perform various adversarial attacks and defense on a pretrained model
    Scheme generates Tensor, not Variable
    """
    def __init__(self, val_loader, args, **kwargs):
        self.val_loader = val_loader
        self.args = args
        self.model = get_model(args)
        self.step = 0
        self.cuda = self.args.cuda

        self.log_path = (
            PROJECT_ROOT / Path("experiments") /
            Path(datetime.now().strftime("%Y%m%d%H%M%S") + "-")).as_posix()
        self.log_path = Path(self.get_dirname(self.log_path, args))
        if not Path.exists(self.log_path):
            Path(self.log_path).mkdir(parents=True, exist_ok=True)
        self.logger = Logger("defense", self.log_path, args.verbose)
        self.logger.log("Checkpoint files will be saved in {}".format(
            self.log_path))

        self.logger.add_level("ATTACK", 21, 'yellow')
        self.logger.add_level("DEFENSE", 22, 'cyan')
        self.logger.add_level("TEST", 23, 'white')
        self.logger.add_level("DIST", 11, 'white')

        self.kwargs = kwargs
        if args.domain_restrict:
            self.artifact = get_artifact(self.model, val_loader, args)
            self.kwargs['artifact'] = self.artifact

    def defend(self):
        self.model.eval()
        defense_scheme = getattr(defenses,
                                 self.args.defense)(self.model, self.args,
                                                    **self.kwargs)
        source = self.model
        if self.args.source is not None and (self.args.ckpt_name !=
                                             self.args.ckpt_src):
            target = self.args.ckpt_name
            self.args.model = self.args.source
            self.args.ckpt_name = self.args.ckpt_src
            source = get_model(self.args)
            self.logger.log("Transfer attack from {} -> {}".format(
                self.args.ckpt_src, target))
        attack_scheme = getattr(attacks, self.args.attack)(source, self.args,
                                                           **self.kwargs)

        eval_metrics = EvaluationMetrics(
            ['Test/Acc', 'Test/Top5', 'Test/Time'])
        eval_def_metrics = EvaluationMetrics(
            ['Def-Test/Acc', 'Def-Test/Top5', 'Def-Test/Time'])
        attack_metrics = EvaluationMetrics(
            ['Attack/Acc', 'Attack/Top5', 'Attack/Time'])
        defense_metrics = EvaluationMetrics(
            ['Defense/Acc', 'Defense/Top5', 'Defense/Time'])
        dist_metrics = EvaluationMetrics(['L0', 'L1', 'L2', 'Li'])

        for i, (images, labels) in enumerate(self.val_loader):
            self.step += 1
            if self.cuda:
                images = images.cuda()
                labels = labels.cuda()
            if self.args.half: images = images.half()

            # Inference
            st = time.time()
            outputs = self.model(self.to_var(images, self.cuda, True))
            outputs = outputs.float()
            _, preds = torch.topk(outputs, 5)

            acc = (labels == preds.data[:, 0]).float().mean()
            top5 = torch.sum(
                (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(),
                dim=1).mean()
            eval_metrics.update('Test/Acc', float(acc), labels.size(0))
            eval_metrics.update('Test/Top5', float(top5), labels.size(0))
            eval_metrics.update('Test/Time', time.time() - st, labels.size(0))

            # Attacker
            st = time.time()
            adv_images, adv_labels = attack_scheme.generate(images, labels)
            if isinstance(adv_images, Variable):
                adv_images = adv_images.data
            attack_metrics.update('Attack/Time',
                                  time.time() - st, labels.size(0))

            # Lp distance
            diff = torch.abs(
                denormalize(adv_images, self.args.dataset) -
                denormalize(images, self.args.dataset))
            L0 = torch.sum((torch.sum(diff, dim=1) > 1e-3).float().view(
                labels.size(0), -1),
                           dim=1).mean()
            diff = diff.view(labels.size(0), -1)
            L1 = torch.norm(diff, p=1, dim=1).mean()
            L2 = torch.norm(diff, p=2, dim=1).mean()
            Li = torch.max(diff, dim=1)[0].mean()
            dist_metrics.update('L0', float(L0), labels.size(0))
            dist_metrics.update('L1', float(L1), labels.size(0))
            dist_metrics.update('L2', float(L2), labels.size(0))
            dist_metrics.update('Li', float(Li), labels.size(0))

            # Defender
            st = time.time()
            def_images, def_labels = defense_scheme.generate(
                adv_images, adv_labels)
            if isinstance(
                    def_images, Variable
            ):  # FIXME - Variable in Variable out for all methods
                def_images = def_images.data
            defense_metrics.update('Defense/Time',
                                   time.time() - st, labels.size(0))
            self.calc_stats('Attack', adv_images, images, adv_labels, labels,
                            attack_metrics)
            self.calc_stats('Defense', def_images, images, def_labels, labels,
                            defense_metrics)

            # Defense-Inference for shift of original image
            st = time.time()
            def_images_org, _ = defense_scheme.generate(images, labels)
            if isinstance(
                    def_images_org, Variable
            ):  # FIXME - Variable in Variable out for all methods
                def_images_org = def_images_org.data
            outputs = self.model(self.to_var(def_images_org, self.cuda, True))
            outputs = outputs.float()
            _, preds = torch.topk(outputs, 5)

            acc = (labels == preds.data[:, 0]).float().mean()
            top5 = torch.sum(
                (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(),
                dim=1).mean()
            eval_def_metrics.update('Def-Test/Acc', float(acc), labels.size(0))
            eval_def_metrics.update('Def-Test/Top5', float(top5),
                                    labels.size(0))
            eval_def_metrics.update('Def-Test/Time',
                                    time.time() - st, labels.size(0))

            if self.step % self.args.log_step == 0 or self.step == len(
                    self.val_loader):
                self.logger.scalar_summary(eval_metrics.avg, self.step, 'TEST')
                self.logger.scalar_summary(eval_def_metrics.avg, self.step,
                                           'TEST')
                self.logger.scalar_summary(attack_metrics.avg, self.step,
                                           'ATTACK')
                self.logger.scalar_summary(defense_metrics.avg, self.step,
                                           'DEFENSE')
                self.logger.scalar_summary(dist_metrics.avg, self.step, 'DIST')

                defense_rate = eval_metrics.avg[
                    'Test/Acc'] - defense_metrics.avg['Defense/Acc']
                if eval_metrics.avg['Test/Acc'] - attack_metrics.avg[
                        'Attack/Acc']:
                    defense_rate /= eval_metrics.avg[
                        'Test/Acc'] - attack_metrics.avg['Attack/Acc']
                else:
                    defense_rate = 0
                defense_rate = 1 - defense_rate

                defense_top5 = eval_metrics.avg[
                    'Test/Top5'] - defense_metrics.avg['Defense/Top5']
                if eval_metrics.avg['Test/Top5'] - attack_metrics.avg[
                        'Attack/Top5']:
                    defense_top5 /= eval_metrics.avg[
                        'Test/Top5'] - attack_metrics.avg['Attack/Top5']
                else:
                    defense_top5 = 0
                defense_top5 = 1 - defense_top5

                self.logger.log(
                    "Defense Rate Top1: {:5.3f} | Defense Rate Top5: {:5.3f}".
                    format(defense_rate, defense_top5), 'DEFENSE')

            if self.step % self.args.img_log_step == 0:
                image_dict = {
                    'Original':
                    to_np(denormalize(images, self.args.dataset))[0],
                    'Attacked':
                    to_np(denormalize(adv_images, self.args.dataset))[0],
                    'Defensed':
                    to_np(denormalize(def_images, self.args.dataset))[0],
                    'Perturbation':
                    to_np(denormalize(images - adv_images,
                                      self.args.dataset))[0]
                }
                self.logger.image_summary(image_dict, self.step)

    def calc_stats(self, method, gen_images, images, gen_labels, labels,
                   metrics):
        """gen_images: Generated from attacker or defender
        Currently just calculating acc and artifact
        """
        success_rate = 0

        if not isinstance(gen_images, Variable):
            gen_images = self.to_var(gen_images.clone(), self.cuda, True)
        gen_outputs = self.model(gen_images)
        gen_outputs = gen_outputs.float()
        _, gen_preds = torch.topk(F.softmax(gen_outputs, dim=1), 5)

        if isinstance(gen_preds, Variable):
            gen_preds = gen_preds.data
        gen_acc = (labels == gen_preds[:, 0]).float().mean()
        gen_top5 = torch.sum(
            (labels.unsqueeze(1).repeat(1, 5) == gen_preds).float(),
            dim=1).mean()

        metrics.update('{}/Acc'.format(method), float(gen_acc), labels.size(0))
        metrics.update('{}/Top5'.format(method), float(gen_top5),
                       labels.size(0))

    def to_var(self, x, cuda, volatile=False):
        """For CPU inference manual cuda setting is needed
        """
        if cuda:
            x = x.cuda()
        return torch.autograd.Variable(x, volatile=volatile)
sTestCaseDir = cfg.sResultDir + sTestName + '/'
sSampleDir = sTestCaseDir + 'samples/'
sCheckpointDir = sTestCaseDir + 'checkpoint/'

makedirs(sCheckpointDir)
makedirs(sSampleDir)
makedirs(sTestCaseDir + 'code/')

from common.logger import Logger

logger = Logger()
logger.set_dir(sTestCaseDir)
logger.set_casename(sTestName)

logger.log(sTestCaseDir)

commandline = ''
for arg in ['CUDA_VISIBLE_DEVICES="0" python3'] + sys.argv:
    commandline += arg + ' '
logger.log(commandline)

logger.log(str_flags(cfg.__flags))

copydir(cfg.sSourceDir, sTestCaseDir + 'code')

############################################################################################################################################

tf.logging.set_verbosity(tf.logging.ERROR)

config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=0, inter_op_parallelism_threads=0)
Example #19
0
from client.config import GENERAL
from common.logger import Logger, LogLevel

logger = Logger("main", logging_level=GENERAL["logging_level"])
logger.log(LogLevel.INFO, "Starting up")
"""
We mimic the Node-like architecture by separating parts of app into self-contained modules.
Initialization is performed during module import (in __init__.py files).
"""

# Service imports. Used to initialize modules and their respective processes/variables. DO NOT REMOVE THEM!
logger.log(LogLevel.INFO, "Starting GPIO adapter")
import client.gpio
logger.log(LogLevel.INFO, "Starting Device client")
import client.socket_connector
logger.log(LogLevel.INFO, "Starting Device controller module")
import client.board_controller
# End of service imports

logger.log(LogLevel.INFO, "Start-up complete")
Example #20
0
class Service(ServiceBase):
    """ Siterummage Big Broker microservice class """

    ## Title text logged during initialisation.
    title_text = 'Site Rummagge Big Broker Microservice'

    ## Copyright text logged on initialisation etc.
    copyright_text = 'Copyright 2021 Site Rummage'

    ## License text logged on initialisation etc.
    license_text = 'All Rights Reserved. Proprietary and confidential'

    def __init__(self, new_instance):
        super().__init__()

        self._quart = new_instance

        ## Instance of the logging wrapper class
        self._logger = Logger()

        ## _is_initialised is inherited from parent class ServiceThread
        self._is_initialised = False

        self._configuration = None

        self._api_schedule = None

        self._api_node_management = None

        self._scrape_node_list = ScrapeNodeList()

    def _initialise(self) -> bool:
        self._logger.write_to_console = True
        self._logger.initialise()

        self._logger.log(LogType.Info,
                         f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})')
        self._logger.log(LogType.Info, self.copyright_text)
        self._logger.log(LogType.Info, self.license_text)

        config_mgr = ConfigurationManager()

        config_file = os.getenv('SITERUMMAGE_BIGBROKER_CONFIG')

        self._logger.log(LogType.Info, f'Configuration file: {config_file}')

        self._configuration = config_mgr.parse_config_file(config_file)
        if not self._configuration:
            self._logger.log(LogType.Error, config_mgr.last_error_msg)
            return False

        self._logger.log(LogType.Info, '+=== Configuration Settings ===+')
        self._logger.log(LogType.Info, '+==============================+')
        page_store_cfg = self._configuration.page_store_api
        self._logger.log(LogType.Info, '+== Page Store Api :->')
        self._logger.log(LogType.Info, f'+= host : {page_store_cfg.host}')
        self._logger.log(LogType.Info, f'+= port : {page_store_cfg.port}')
        processing_queue_cfg = self._configuration.processing_queue_api
        self._logger.log(LogType.Info, '+== Page Store Api :->')
        self._logger.log(LogType.Info, f'+= host : {processing_queue_cfg.host}')
        self._logger.log(LogType.Info, f'+= port : {processing_queue_cfg.port}')
        self._logger.log(LogType.Info, '+==============================+')

        self._api_schedule = ApiSchedule(self._quart, self._configuration)

        self._api_node_management = ApiNodeManagement(self._quart,
                                                      self._configuration,
                                                      self._scrape_node_list)

        self._is_initialised = True

        return True

    async def _main_loop(self):
        # if not self._master_thread_class.initialise():
        #     return False
        pass

    def _shutdown(self):
        self._logger.log(LogType.Info, 'Shutting down...')
sTestName = cfg.sDataSet + ('_' + sResultTag if len(sResultTag) else '')
sTestCaseDir = cfg.sResultDir + sTestName + '/'
sSampleDir = sTestCaseDir + '/sample/'
sCheckpointDir = sTestCaseDir + 'checkpoint/'

makedirs(sSampleDir)
makedirs(sCheckpointDir)
makedirs(sTestCaseDir + 'source/code/')
makedirs(sTestCaseDir + 'source/common/')

logger = Logger()
logger.set_dir(sTestCaseDir)
logger.set_casename(sTestName)

logger.linebreak()
logger.log(sTestCaseDir)

commandline = ''
for arg in ['python3'] + sys.argv:
    commandline += arg + ' '
logger.log('\n' + commandline + '\n')

logger.log(str_flags(cfg.__flags))
logger.log('Using GPU%d\n' % GPU_ID)

config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=False,
                        intra_op_parallelism_threads=0,
                        inter_op_parallelism_threads=0)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
        value_loss = advantages.pow(2).mean()        
        action_loss = -(advantages.data * action_log_probs).mean()

        optimizer.zero_grad()        
        loss = value_loss * value_loss_coef + action_loss - entropy * entropy_coef
        
        loss.backward()
        nn.utils.clip_grad_norm_(actor_critic.parameters(), max_grad_norm)
        optimizer.step()
    
        if i_update % 100 == 0:            
            all_rewards.append(final_rewards.mean())
            all_losses.append(loss.item())
            timer.update(time.time())
            loopstogo = (num_frames - i_update) / 100
            estimatedtimetogo = timer.getTimeToGo(loopstogo)
            
            print('epoch %s. reward: %s' % (i_update, np.mean(all_rewards[-10:])))            
            print('loss %s' % all_losses[-1])
            logger.printDayFormat("estimated time to run: ", estimatedtimetogo)
            print("######## AC_Pacman_{0} ########".format(mode))                        
        rollout.after_update()
        
    logger.log(all_rewards, "Data/", "all_rewards_{0}.txt".format(mode))  
    logger.log(all_losses, "Data/", "all_losses_{0}.txt".format(mode))      
    logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_{0}".format(mode))    

    

Example #23
0
from common.logger import Logger, LogLevel
from server.config import GENERAL

logger = Logger("main", logging_level=GENERAL["logging_level"])
logger.log(LogLevel.INFO, "Starting up")
"""
We mimic the Node-like architecture by separating parts of app into self-contained modules.
Initialization is performed during module import (in __init__.py files).
"""

# Service imports. Used to initialize modules and their respective processes/variables. DO NOT REMOVE THEM!
logger.log(LogLevel.INFO, "Starting Web and Database service")
import server.web
logger.log(LogLevel.INFO, "Starting Device server")
import server.socket_server
# End of service imports

logger.log(LogLevel.INFO, "Start-up complete")
Example #24
0
            print("---------------------------")
            
            timer.update(time.time())            
            timediff = timer.getTimeDiff()
            total_time = timer.getTotalTime()
            loopstogo = (num_frames - i_update) / 100
            estimatedtimetogo = timer.getTimeToGo(loopstogo)
            logger.printDayFormat("runntime last epochs: ", timediff)
            logger.printDayFormat("total runtime: ", total_time)
            logger.printDayFormat("estimated time to run: ", estimatedtimetogo)                       
            print("######## AC_KeyCollect ########")
        
        rollout.after_update()
        
        # snapshot of weights, data and optimzer every 1000 epochs
        if i_update % 1000 == 0 and i_update > 0:
            logger.log(all_rewards, "Data/", "all_rewards_KeyCollect.txt")            
            logger.log(all_losses, "Data/", "all_losses_KeyCollect.txt")                        
            logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_KeyCollect")
            logger.log_state_dict(optimizer.state_dict(), "Data/actor_critic_optimizer_KeyCollect")            

    # final save        
    logger.log(all_rewards, "Data/", "all_rewards_KeyCollect.txt")    
    logger.log(all_losses, "Data/", "all_losses_KeyCollect.txt")        
    logger.log_state_dict(actor_critic.state_dict(), "Data/actor_critic_KeyCollect")
    logger.log_state_dict(optimizer.state_dict(), "Data/actor_critic_optimizer_KeyCollect")            




Example #25
0
class SpiderBase():

    def __init__(self, data_adapter_config_path, source_name, encode = "utf-8", parse_try_limit = 3):
        self.logger = Logger("spider", source_name)  
        
        self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path, source_name, self.logger)
        self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger)
        self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path, self.logger)
        self.source_name = source_name
        self.encode = encode 
        self.parse_try_limit = parse_try_limit
        self.exploring_times = 0
    
    
    def url_exists_in_doc_raw(self, url):
        url_hash = common_utils.gen_url_hash(url)
        return self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash)
        
    def url_hash_exists_in_data_raw(self, url_hash):
        return self.data_raw_adapter.has_data_raw_by_url_hash(url_hash)

    def parse(self, url_hash, page, encode, stage, context, created_at, page_crawled_at):
        '''
        you must override this function
        '''
        self.logger.log("what the hell!!!you have to override to implement parse logic!!!")
        
        features = {} 
        
        images = []
        images.append({"name" : "test_image_name", "url" : "test_image_url", "image_format" : "jpg"})
        
        next_update_time = None
        
        children = []
        children.append({"url" : "test_url", "stage" : "test_stage", "context" : "test_context", "operation_flag" : SpiderChildNodeOperationFlag.NEW_ADD}) 

        return features, images, next_update_time, children 

    def explore_child(self, father_url_hash, url, url_hash, stage, context, operation_flag):
        if operation_flag == SpiderChildNodeOperationFlag.NEW_ADD:
            if not self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
                self.doc_raw_adapter.create_doc_raw(url_hash, url, stage, context, father_url_hash)
                self.logger.log("child [%s] %s new added."%(url_hash, url))

        else:
            if self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
                if operation_flag == SpiderChildNodeOperationFlag.UPDATE_INFO_ONLY:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                       stage = stage, 
                                                                       context = context, 
                                                                       father = father_url_hash) 
                    self.logger.log("child [%s]'s info is updated."%(url_hash))
                elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_REPARSE:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                       stage = stage, 
                                                                       context = context, 
                                                                       father = father_url_hash, 
                                                                       status_flag = DocRawStatus.PAGE_CRAWLED)
                    self.logger.log("child [%s] is set to reparse data."%(url_hash))
                elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_RECRAWL:
                    self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                       stage = stage, 
                                                                       context = context, 
                                                                       father = father_url_hash, 
                                                                       status_flag = DocRawStatus.NEW_ADDED)
                    self.logger.log("child [%s]'s is set to recrawled page."%(url_hash))
                

    def spider_run(self):
        for url_hash, url, stage, page, encode, context, created_at, page_crawled_at in self.doc_raw_adapter.load_unparsed_doc_raw():
            try:
                self.logger.log("parsing [%s]."%(url_hash))
                features, images, next_update_time, children = self.parse(url_hash, page, encode, stage, context, created_at, page_crawled_at)
                if images != None:
                    for i in range(0, len(images)):
                        try:
                            image_id = common_utils.gen_url_hash(images[i]["url"])
                            if not self.image_store_adapter.has_image_index_by_image_id(image_id):
                                images[i]["image_id"] = image_id
                                self.image_store_adapter.create_image_index(image_id, images[i]["image_format"], images[i]["url"])
                                self.logger.log("image [%s] created for [%s]."%(image_id, url_hash))
                        except BaseException, e:
                            self.logger.log("Error occured when creating image index: %s"%(e))
                
                if features != None:
                    if not self.url_hash_exists_in_data_raw(url_hash):
                        self.data_raw_adapter.create_data_raw(url_hash, url, features, images)
                        self.logger.log("features for [%s] is added."%(url_hash))
                    else:
                        self.data_raw_adapter.update_data_raw(url_hash, features, images)
                        self.logger.log("features for [%s] is updated."%(url_hash))

                children_url_hashes = None 
                if children != None:
                    children_url_hashes = []
                    for child in children:
                        try:
                            url_new = child["url"]
                            url_hash_new = common_utils.gen_url_hash(child["url"])
                            stage_new = child["stage"]
                            context_new = child["context"]
                            operation_flag = child["operation_flag"]
                            
                            self.explore_child(url_hash, url_new, url_hash_new, stage_new, context_new, operation_flag)
                            
                            children_url_hashes.append(url_hash_new)
                        except BaseException, e:
                            self.logger.log("Error occured when exploring child: %s"%(e))
                
                self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                   next_update_time = next_update_time, 
                                                                   children = children_url_hashes,
                                                                   status_flag = DocRawStatus.DATA_PARSED)
             
            except BaseException, e:
                self.logger.log("Error occured in main spider_run: %s"%(e))
                if url_hash != None:
                    parse_try_times = self.doc_raw_adapter.get_doc_raw_parse_try_times(url_hash)
                    if parse_try_times + 1 >= self.parse_try_limit:
                        self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                           status_flag = DocRawStatus.ERROR_FAILED_TO_PARSED)
                    else:
                        self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash, 
                                                                           next_update_time = datetime.datetime.now() + datetime.timedelta(86400),
                                                                           parse_try_times = parse_try_times + 1, 
                                                                           status_flag = DocRawStatus.NEW_ADDED)
Example #26
0
sTestCaseDir = cfg.sResultDir + sTestName + '/'
sSampleDir = sTestCaseDir + '/samples/'
sCheckpointDir = sTestCaseDir + '/checkpoint/'

makedirs(cfg.sResultDir)
makedirs(sTestCaseDir)
makedirs(sSampleDir)
makedirs(sCheckpointDir)
makedirs(sTestCaseDir + '/code/')

logger = Logger()
logger.set_dir(sTestCaseDir)
logger.set_casename(sTestName)

logger.log(sTestCaseDir)

commandline = ''
for arg in ['CUDA_VISIBLE_DEVICES="0" python3'] + sys.argv:
    commandline += arg + ' '
logger.log(commandline)

logger.log(str_flags(cfg.__flags))

copydir(SOURCE_DIR + "code/", sTestCaseDir + '/source/code/')
copydir(SOURCE_DIR + "common/", sTestCaseDir + '/source/common/')

tf.logging.set_verbosity(tf.logging.ERROR)

config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True
Example #27
0
def ListPasses(outputFilename):
  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
  for compiler_pass in c1File.passes:
    Logger.log(compiler_pass.name)
class Trainer:
    """ Train and Validation with single GPU """
    def __init__(self, train_loader, val_loader, args):
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.args = args
        self.model = get_model(args)
        self.epochs = args.epochs
        self.total_step = len(train_loader) * args.epochs
        self.step = 0
        self.epoch = 0
        self.start_epoch = 1
        self.lr = args.learning_rate
        self.best_acc = 0

        # Log
        self.log_path = (
                PROJECT_ROOT / Path(SAVE_DIR) /
                Path(datetime.now().strftime("%Y%m%d%H%M%S") + "-")
                ).as_posix()
        self.log_path = Path(self.get_dirname(self.log_path, args))
        if not Path.exists(self.log_path):
            Path(self.log_path).mkdir(parents=True, exist_ok=True)
        self.logger = Logger("train", self.log_path, args.verbose)
        self.logger.log("Checkpoint files will be saved in {}".format(self.log_path))

        self.logger.add_level('STEP', 21, 'green')
        self.logger.add_level('EPOCH', 22, 'cyan')
        self.logger.add_level('EVAL', 23, 'yellow')

        self.criterion = nn.CrossEntropyLoss()
        if self.args.cuda:
            self.criterion = self.criterion.cuda()
        if args.half:
            self.model.half()
            self.criterion.half()

        params = self.model.parameters()
        self.optimizer = get_optimizer(args.optimizer, params, args)

    def train(self):
        self.eval()
        for self.epoch in range(self.start_epoch, self.args.epochs+1):
            self.adjust_learning_rate([int(self.args.epochs/2), int(self.args.epochs*3/4)], factor=0.1)
            self.train_epoch()
            self.eval()

        self.logger.writer.export_scalars_to_json(
            self.log_path.as_posix() + "/scalars-{}-{}-{}.json".format(
                self.args.model,
                self.args.seed,
                self.args.activation
            )
        )
        self.logger.writer.close()

    def train_epoch(self):
        self.model.train()
        eval_metrics = EvaluationMetrics(['Loss', 'Acc', 'Time'])

        for i, (images, labels) in enumerate(self.train_loader):
            st = time.time()
            self.step += 1
            images = torch.autograd.Variable(images)
            labels = torch.autograd.Variable(labels)
            if self.args.cuda:
                images = images.cuda()
                labels = labels.cuda()
            if self.args.half: images = images.half()

            outputs, loss = self.compute_loss(images, labels)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            outputs = outputs.float()
            loss = loss.float()
            elapsed_time = time.time() - st

            _, preds = torch.max(outputs, 1)
            accuracy = (labels == preds.squeeze()).float().mean()

            batch_size = labels.size(0)
            eval_metrics.update('Loss', float(loss), batch_size)
            eval_metrics.update('Acc', float(accuracy), batch_size)
            eval_metrics.update('Time', elapsed_time, batch_size)

            if self.step % self.args.log_step == 0:
                self.logger.scalar_summary(eval_metrics.val, self.step, 'STEP')

        # Histogram of parameters
        for tag, p in self.model.named_parameters():
            tag = tag.split(".")
            tag = "Train_{}".format(tag[0]) + "/" + "/".join(tag[1:])
            try:
                self.logger.writer.add_histogram(tag, p.clone().cpu().data.numpy(), self.step)
                self.logger.writer.add_histogram(tag+'/grad', p.grad.clone().cpu().data.numpy(), self.step)
            except Exception as e:
                print("Check if variable {} is not used: {}".format(tag, e))

        self.logger.scalar_summary(eval_metrics.avg, self.step, 'EPOCH')


    def eval(self):
        self.model.eval()
        eval_metrics = EvaluationMetrics(['Loss', 'Acc', 'Time'])

        for i, (images, labels) in enumerate(self.val_loader):
            st = time.time()
            images = torch.autograd.Variable(images)
            labels = torch.autograd.Variable(labels)
            if self.args.cuda:
                images = images.cuda()
                labels = labels.cuda()
            if self.args.half: images = images.half()

            outputs, loss = self.compute_loss(images, labels)

            outputs = outputs.float()
            loss = loss.float()
            elapsed_time = time.time() - st

            _, preds = torch.max(outputs, 1)
            accuracy = (labels == preds.squeeze()).float().mean()

            batch_size = labels.size(0)
            eval_metrics.update('Loss', float(loss), batch_size)
            eval_metrics.update('Acc', float(accuracy), batch_size)
            eval_metrics.update('Time', elapsed_time, batch_size)

        # Save best model
        if eval_metrics.avg['Acc'] > self.best_acc:
            self.save()
            self.logger.log("Saving best model: epoch={}".format(self.epoch))
            self.best_acc = eval_metrics.avg['Acc']
            self.maybe_delete_old_pth(log_path=self.log_path.as_posix(), max_to_keep=1)

        self.logger.scalar_summary(eval_metrics.avg, self.step, 'EVAL')

    def get_dirname(self, path, args):
        path += "{}-".format(getattr(args, 'dataset'))
        path += "{}-".format(getattr(args, 'seed'))
        path += "{}".format(getattr(args, 'model'))
        return path

    def save(self, filename=None):
        if filename is None:
            filename = os.path.join(self.log_path, 'model-{}.pth'.format(self.epoch))
        torch.save({
            'model': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'epoch': self.start_epoch,
            'best_acc': self.best_acc,
            'args': self.args
        }, filename)

    def load(self, filename=None):
        if filename is None: filename = self.log_path
        S = torch.load(filename) if self.args.cuda else torch.load(filename, map_location=lambda storage, location: storage)
        self.model.load_state_dict(S['model'])
        self.optimizer.load_state_dict(S['optimizer'])
        self.epoch = S['epoch']
        self.best_acc = S['best_acc']
        self.args = S['args']

    def maybe_delete_old_pth(self, log_path, max_to_keep):
        """Model filename must end with xxx-xxx-[epoch].pth
        """
        # filename and time
        pths = [(f, int(f[:-4].split("-")[-1])) for f in os.listdir(log_path) if f.endswith('.pth')]
        if len(pths) > max_to_keep:
            sorted_pths = sorted(pths, key=lambda tup: tup[1])
            for i in range(len(pths) - max_to_keep):
                os.remove(os.path.join(log_path, sorted_pths[i][0]))

    def show_current_model(self):
        print("\n".join("{}: {}".format(k, v) for k, v in sorted(vars(self.args).items())))

        model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
        total_params = np.sum([np.prod(p.size()) for p in model_parameters])

        print('%s\n\n'%(type(self.model)))
        print('%s\n\n'%(inspect.getsource(self.model.__init__)))
        print('%s\n\n'%(inspect.getsource(self.model.forward)))

        # Total 95
        print("*"*40 + "%10s" % self.args.model + "*"*45)
        print("*"*40 + "PARAM INFO" + "*"*45)
        print("-"*95)
        print("| %40s | %25s | %20s |" % ("Param Name", "Shape", "Number of Params"))
        print("-"*95)
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                print("| %40s | %25s | %20d |" % (name, list(param.size()), np.prod(param.size())))
        print("-"*95)
        print("Total Params: %d" % (total_params))
        print("*"*95)

    def adjust_learning_rate(self, milestone, factor=0.1):
        if self.epoch in milestone:
            self.lr *= factor
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = self.lr

    def compute_loss(self, images, labels):
        outputs = self.model(images)
        loss = self.criterion(outputs, labels)
        return outputs, loss
Example #29
0
                      help='run loaders periodically in background')
    argp.add_argument('-publish',
                      type=int,
                      default=None,
                      help='publish datasets to kaggle every x seconds')
    argp.add_argument('-pause',
                      type=int,
                      default=None,
                      help='pause x seconds after fetching a subreddit')
    args = argp.parse_args()

    # handle process termination
    signal.signal(signal.SIGTERM, terminate)

    try:
        logger.log(f'\n{"-"*45}{"ENVIRONMENT":^15}{"-"*45}\n')
        logger.log(Env.init())
        logger.log(f'\n{"-"*45}{"STARTED":^15}{"-"*45}\n')

        # load config
        root = os.path.abspath(os.path.dirname(__file__))
        with open(os.path.join(root, args.config)) as f:
            config = json.load(f)

        # kaggle client
        kaggle = Kaggle(config=os.path.join('config', 'kaggle.json'))

        # start background tasks
        while not terminated:

            for subreddit in args.subreddits:
Example #30
0
class ImageCrawler:

    NUM_PER_FETCH = 100
    NUM_PROCESSES = 10

    def __init__(self, database_config_path):
        self.queue = JoinableQueue()
        self.logger = Logger("image_crawler")
        self.adapter = ImageStoreAdapter(database_config_path, self.logger)

    def produce(self):
        while True:
            if self.queue.empty():
                for image_id, link in self.adapter.load_undownloaded_images(
                        self.NUM_PER_FETCH):
                    self.logger.log("Producer: add new image to crawl:" +
                                    image_id + " " + link)
                    self.queue.put((image_id, link))
            time.sleep(10)

    def consume(self, process_id):
        while True:
            self.logger.log("Consumer process:" + str(process_id) +
                            " fetch new image from queue")
            if not self.queue.empty():
                image_id, link = self.queue.get()
                self.logger.log("Consumer process:" + str(process_id) +
                                " start crawling " + str(link))
                image = common_utils.page_crawl(link)
                if image != None:
                    self.logger.log(link + "crawled successfully")
                    self.adapter.store_image(image_id, image)
                else:
                    self.logger.log(link + " failed at crawling")
                    self.adapter.update_image_status(
                        image_id, ImageIndexStatus.DOWNLOAD_FAILED)
                self.queue.task_done()
                time.sleep(1)
            else:
                self.logger.log("Queue empty")
                time.sleep(10)

    def run(self):
        producer = Process(target=self.produce)
        producer.start()
        consumers = []
        for i in range(self.NUM_PROCESSES):
            consumer = Process(target=self.consume, args=(i, ))
            consumers.append(consumer)
            consumer.start()

        for consumer in consumers:
            consumer.join()
        producer.join()
        self.queue.join()
Example #31
0
def list_passes(output_filename):
    c1_file = parse_c1_visualizer_stream(output_filename,
                                         open(output_filename, "r"))
    for compiler_pass in c1_file.passes:
        Logger.log(compiler_pass.name)
Example #32
0
class Service(ServiceBase):
    """ Siterummage Processing Queue microservice class """
    #pylint: disable=too-many-instance-attributes

    ## Title text logged during initialisation.
    title_text = 'Site Rummagge Processing Queue Microservice'

    ## Copyright text logged on initialisation etc.
    copyright_text = 'Copyright 2021 Site Rummage'

    ## License text logged on initialisation etc.
    license_text = 'All Rights Reserved. Proprietary and confidential'

    def __init__(self, new_instance):
        super().__init__()

        self._quart = new_instance

        ## Instance of the logging wrapper class
        self._logger = Logger()

        ## _is_initialised is inherited from parent class ServiceThread
        self._is_initialised = False

        self._configuration = None

        self._db_interface = None

        self._api_queue = None

        self._queue_cache = None

        self._processing_queue = None

    def _initialise(self) -> bool:
        self._logger.write_to_console = True
        self._logger.initialise()

        self._logger.log(
            LogType.Info,
            f'{self.title_text} {VERSION} (Core Version {CORE_VERSION})')
        self._logger.log(LogType.Info, self.copyright_text)
        self._logger.log(LogType.Info, self.license_text)

        config_mgr = ConfigurationManager()

        config_file = os.getenv('SITERUMMAGE_PROCESSINGQUEUE_CONFIG')

        self._configuration = config_mgr.parse_config_file(config_file)
        if not self._configuration:
            self._logger.log(LogType.Error, config_mgr.last_error_msg)
            return False

        self._logger.log(LogType.Info, '+=== Configuration Settings ===+')
        self._logger.log(LogType.Info, '+==============================+')
        db_config = self._configuration.db_settings
        self._logger.log(LogType.Info, '+== Database Settings :->')
        self._logger.log(LogType.Info,
                         f'+= Cache Size          : {db_config.cache_size}')
        self._logger.log(
            LogType.Info,
            f'+= DB Filename         : {db_config.database_file}')
        self._logger.log(
            LogType.Info,
            f'+= Fail On No Database : {db_config.fail_on_no_database}')
        self._logger.log(LogType.Info, '+== Api Settings :->')
        self._logger.log(LogType.Info, '+= Auth Key : ******')
        self._logger.log(LogType.Info, '+==============================+')

        self._db_interface = DbInterface(db_config.database_file)

        if not self._db_interface.database_exists():
            if self._configuration.db_settings.fail_on_no_database:
                self._logger.log(LogType.Error,
                                 "DB doesn't exist and fail on create is set")
                return False

            if not self._db_interface.build_database():
                self._logger.log(LogType.Error,
                                 self._db_interface.last_error_message)
                return False

            self._logger.log(LogType.Info, 'Database created successfully')

        if not self._db_interface.open():
            self._logger.log(LogType.Error,
                             self._db_interface.last_error_message)
            return False

        self._processing_queue = UrlsBeingProcessed()

        self._queue_cache = QueueCache(self._db_interface, self._configuration,
                                       self._logger, self._processing_queue)

        self._api_queue = ApiQueue(self._quart, self._db_interface,
                                   self._configuration, self._processing_queue,
                                   self._queue_cache)

        self._is_initialised = True

        return True

    async def _main_loop(self):
        ...

    def _shutdown(self):
        self._logger.log(LogType.Info, 'Shutting down...')

        if self._db_interface.is_connected:
            self._db_interface.close()
            self._logger.log(LogType.Info, '|-> Database connection closed')
Example #33
0
def ListPasses(outputFilename):
    c1File = ParseC1visualizerStream(os.path.basename(outputFilename),
                                     open(outputFilename, "r"))
    for compiler_pass in c1File.passes:
        Logger.log(compiler_pass.name)
Example #34
0
sTestCaseDir = cfg.sResultDir + sTestName + '/'
sSampleDir = sTestCaseDir + '/samples/'
sCheckpointDir = sTestCaseDir + '/checkpoint/'

makedirs(cfg.sResultDir)
makedirs(sTestCaseDir)
makedirs(sSampleDir)
makedirs(sCheckpointDir)
makedirs(sTestCaseDir + '/code/')

logger = Logger()
logger.set_dir(sTestCaseDir)
logger.set_casename(sTestName)

logger.log(sTestCaseDir)

commandline = ''
for arg in ['CUDA_VISIBLE_DEVICES="0" python3'] + sys.argv:
    commandline += arg + ' '
logger.log(commandline)

logger.log(str_flags(cfg.__flags))

copydir(SOURCE_DIR + "code/", sTestCaseDir + '/source/code/')
copydir(SOURCE_DIR + "common/", sTestCaseDir + '/source/common/')

tf.logging.set_verbosity(tf.logging.ERROR)

config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True
Example #35
0
class ImageCrawler:
    
    NUM_PER_FETCH = 100
    NUM_PROCESSES = 10
    def __init__(self, database_config_path):
        self.queue = JoinableQueue()
        self.logger = Logger("image_crawler")
        self.adapter = ImageStoreAdapter(database_config_path, self.logger)
        
    def produce(self):
        while True:
            if self.queue.empty():
                for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH):
                    self.logger.log("Producer: add new image to crawl:" + image_id + " " + link)
                    self.queue.put((image_id, link))
            time.sleep(10)
            
    def consume(self, process_id):
        while True:
            self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue")
            if not self.queue.empty():
                image_id, link = self.queue.get()
                self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link))
                image = common_utils.page_crawl(link)
                if image != None:
                    self.logger.log(link + "crawled successfully")
                    self.adapter.store_image(image_id, image)
                else:
                    self.logger.log(link + " failed at crawling")
                    self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED)
                self.queue.task_done()
                time.sleep(1)
            else:
                self.logger.log("Queue empty")
                time.sleep(10)
    
    def run(self):
        producer = Process(target=self.produce)
        producer.start()
        consumers = []
        for i in range(self.NUM_PROCESSES):
            consumer = Process(target=self.consume, args=(i,))
            consumers.append(consumer)
            consumer.start()
        
        for consumer in consumers:
            consumer.join()
        producer.join()
        self.queue.join()