def log_all(msg):
     for _ in range(0, 11):
         logger.debug(msg)
         logger.info(msg)
         logger.warning(msg)
         logger.error(msg)
         logger.critical(msg)
 def parseHtml(cls, doc, highlight=None, parser='html.parser', source='unknown', target_source=None, item=None, id=None, title=None, link=None, abstract=None, details=None):
     records = []
     try:
         if doc:
             soup = BeautifulSoup(doc, parser)
             log.debug(f"Got results from '{source}'.")
             elements = cls.__select(soup, item)
             for elt in elements:
                 _source = source
                 _target_source = target_source if target_source else source
                 _id = cls.__select(elt, id)
                 _title = cls.__select(elt, title)
                 _link = cls.__select(elt, link)
                 _abstract = cls.__select(elt, abstract)
                 _details = cls.__select(elt, details)
                 rec = cls.record(_source, _target_source, _id, _title,
                                  _link, _abstract, _details).to_dict()
                 rec = Highlighter.highlight(
                     rec, ['title', 'abstract'], highlight)
                 log.info(f"Record: {rec}")
                 records.append(rec)
     except Exception as ex:
         log.error(ex)
         traceback.print_exc(file=stdout)
     finally:
         return records
    def test_non_str_logging(self):
        logger.info(10)
        self.assertIn("10", self.last_line())

        # Those should not throw any error.
        logger.debug([10, 20, 30])
        logger.critical({})
        logger.warning(set([-1, 4]))
Exemple #4
0
def initChangeTracker():
    log.debug("Initialze change tracker")
    from modules.change_tracker import ChangeTracker
    changeTracker = ChangeTracker(
        __get_observe_database_uri(),
        [coll.strip() for coll in environ.get('CT_COLLECTIONS').split(',')],
        [field.strip() for field in environ.get('CT_FIELDS').split(',')])
    changeTracker.start()
 def test_message_is_logged(self):
     logger.debug("message 1")
     self.assertIn("message 1", self.last_line())
     logger.info("message 2")
     self.assertIn("message 2", self.last_line())
     logger.warning("message 3")
     self.assertIn("message 3", self.last_line())
     logger.error("message 4")
     self.assertIn("message 4", self.last_line())
     logger.critical("message 5")
     self.assertIn("message 5", self.last_line())
 def test_filepath_is_logged(self):
     logger.debug("message 1")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.info("message 2")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.warning("message 3")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.error("message 4")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.critical("message 5")
     self.assertIn("test_colored_logger.py", self.last_line())
 def test_level_is_logged(self):
     logger.debug("message 1")
     self.assertIn("DEBUG", self.last_line())
     logger.info("message 2")
     self.assertIn("INFO", self.last_line())
     logger.warning("message 3")
     self.assertIn("WARNING", self.last_line())
     logger.error("message 4")
     self.assertIn("ERROR", self.last_line())
     logger.critical("message 5")
     self.assertIn("CRITICAL", self.last_line())
 def run(self):
     try:
         log.debug(f"[{self.id}] Flusher started.")
         self.running = True
         while (self.running):
             self.__handleQ()
             time.sleep(ChangeTracker.CHANGES_TIMEOUT)
         log.debug(f"[{self.id}] Flusher stopped.")
     except Exception as ex:
         log.error(ex)
         traceback.print_exc(file=sys.stdout)
Exemple #9
0
 def get(url: str, **kwargs):
     log.info(f"Request url={url} args={kwargs}")
     try:
         headers = {
             'User-Agent':
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
         }
         res = requests.get(url, params=kwargs, timeout=5, headers=headers)
         log.debug(f"Response code={res.status_code}")
         if res.ok:
             with open(f"{url.replace('/', '_')}.html", 'w') as f:
                 f.write(res.text)
             return res.text
         else:
             log.error(res.text)
             return None
     except requests.RequestException as ex:
         log.error(ex)
Exemple #10
0
 def get(cls, urls: list):
     if not isinstance(urls, list):
         raise TypeError('urls is not of type list')
     response = None
     for url in urls:
         params = url['params']
         for param, value in params.items():
             match = cls.RE_PARSE_PARAM.match(str(value))
             if match:
                 action = match.group('action')
                 attributes = match.group('attributes')
                 content = match.group('content')
                 log.debug(
                     f"params: action={action}, attributes={attributes}, content={content}"
                 )
                 if action == 'eval':
                     if attributes == 'json':
                         if response:
                             try:
                                 log.debug(f"response={response}")
                                 json = loads(response)
                                 log.debug(f"json={json}")
                                 content = eval(content)
                             except Exception as ex:
                                 log.warning(
                                     f"Cannot evaluate {content}: {ex}")
                                 traceback.print_exc(file=stdout)
                                 content = ''
             params[param] = content
         response = Requester.get(url['url'], **params)
         if not response:
             break
     return response
 def __select(cls, elt, selector):
     ret = None
     if selector is not None:
         log.debug(f"Try selector: '{selector}'")
         parts = selector.split('|')
         selector = parts[0]
         filters = None
         if len(parts) > 1:
             filters = parts[1:]
         match = cls.RE_PARSE_INSTRUCTION.match(selector)
         if match:
             action = match.group('action')
             attributes = match.group('attributes')
             selector = match.group('selector')
             log.debug(
                 f"action={action} attributes={attributes} selector={selector}")
             try:
                 if action == 'attr':
                     ret = elt.select(selector)[0][attributes]
                 elif action == 'first':
                     ret = elt.select_one(selector)
                 else:
                     ret = elt.select(selector)
                 if filters:
                     for filter in filters:
                         log.debug(f"Apply filter {filter}")
                         if not isinstance(ret, str):
                             ret = ret.text
                         ret = eval(f"{filter.replace('$','str(ret)')}")
             except Exception as ex:
                 log.error(ex)
                 traceback.print_exc(file=stdout)
             finally:
                 log.debug(f"Found {len(ret) if ret else 0} entries.")
                 log.debug("---")
     if not ret:
         ret = ''
     return ret
 def log_all():
     logger.debug("message 1")
     logger.info("message 2")
     logger.warning("message 3")
     logger.error("message 4")
     logger.critical("message 5")
        def run(self):
            if self.mongo_uri is None:
                return
            db = pymongo.MongoClient(self.mongo_uri).get_database()
            pipeline = [{
                "$match": {
                    "ns.coll": {
                        "$in": self.collections
                    }
                }
            }, {
                "$set": {
                    "timestamp": "$clusterTime",
                    "user": f"$fullDocument.{environ.get('CT_USER_FIELD')}",
                    "db": "$ns.db",
                    "coll": "$ns.coll",
                    "doc_id": "$fullDocument._id",
                    "type": "$operationType",
                    "updatedFields": "$updateDescription.updatedFields",
                    "removedFields": "$updateDescription.removedFields",
                    "fullDocument": "$fullDocument"
                }
            }, {
                "$project": {
                    "timestamp": 1,
                    "user": 1,
                    "db": 1,
                    "coll": 1,
                    "type": 1,
                    "doc_id": 1,
                    "updatedFields": 1,
                    "removedFields": 1,
                    "fullDocument": 1
                }
            }]
            resume_token = None
            self.running = True
            for flusher in self.flushers:
                flusher.start()
            self.__status = 'running'
            while self.running:
                try:
                    with db.watch(pipeline,
                                  'updateLookup',
                                  resume_after=resume_token) as stream:
                        if not self.running:
                            log.debug("Closeing stream...")
                            stream.close()
                        for change in stream:
                            if not self.running:
                                break
                            createDoc = False
                            ignoredFields = []

                            # General changes
                            change['timestamp'] = change[
                                'timestamp'].as_datetime().strftime(
                                    '%Y-%m-%dT%H:%M:%S.%f')
                            if 'user' not in change:
                                change['user'] = '******'
                            else:
                                change['user'] = change['user']

                            # Type specific changes
                            if change['type'] == 'insert':
                                change['fullDocument'] = change['fullDocument']
                                createDoc = True
                                if environ.get('CT_DEBUG'):
                                    log.debug(
                                        "{timestamp}: user={user} db={db} coll={coll} type={type} doc_id={doc_id}"
                                        .format(**change))
                            elif change['type'] == 'update':
                                updatedFields = {}
                                removedFields = []
                                for field, value in change[
                                        'updatedFields'].items():
                                    if self.__match(field):
                                        # json_value = json.loads(value)
                                        if isinstance(value, (dict, list)):
                                            flat_value = flatten_json(value)
                                            for _field, _value in flat_value.items(
                                            ):
                                                updatedFields[
                                                    f"{field}.{_field}"] = _value
                                        else:
                                            updatedFields[field] = value
                                        createDoc = True
                                    else:
                                        ignoredFields.append(field)
                                for field in change['removedFields']:
                                    if self.__match(field):
                                        removedFields.append(field)
                                        createDoc = True
                                    else:
                                        ignoredFields.append(field)

                                change['updatedFields'] = updatedFields
                                change['removedFields'] = removedFields
                                del change['fullDocument']
                                if environ.get('CT_DEBUG'):
                                    log_msg = "{timestamp}: user={user} db={db} coll={coll} type={type} doc_id={doc_id} updatedFields={updatedFields} removedFields={removedFields}".format(
                                        **change)
                                    log_msg = (
                                        log_msg[:500] + '...'
                                    ) if len(log_msg) > 500 else log_msg
                                    log.debug(log_msg)

                            # If we need to create a change entry
                            if createDoc:
                                self.__add(change)
                            else:
                                if change['type'] in ['insert', 'update']:
                                    log.debug(
                                        "Not tracking change for: {timestamp}: user={user} db={db} coll={coll} type={type} doc_id={doc_id} ignoredFields={ignoredFields}"
                                        .format(**change,
                                                ignoredFields=ignoredFields))
                                else:
                                    log.warning(
                                        "Not tracking change for: {0}".format(
                                            change))
                            resume_token = stream.resume_token
                except Exception as ex:
                    self.__status = 'error'
                    log.error(ex)
                    traceback.print_exc(file=sys.stdout)
                    pass