Ejemplo n.º 1
0
 def log_all(msg):
     for _ in range(0, 11):
         logger.debug(msg)
         logger.info(msg)
         logger.warning(msg)
         logger.error(msg)
         logger.critical(msg)
Ejemplo n.º 2
0
 def parseHtml(cls, doc, highlight=None, parser='html.parser', source='unknown', target_source=None, item=None, id=None, title=None, link=None, abstract=None, details=None):
     records = []
     try:
         if doc:
             soup = BeautifulSoup(doc, parser)
             log.debug(f"Got results from '{source}'.")
             elements = cls.__select(soup, item)
             for elt in elements:
                 _source = source
                 _target_source = target_source if target_source else source
                 _id = cls.__select(elt, id)
                 _title = cls.__select(elt, title)
                 _link = cls.__select(elt, link)
                 _abstract = cls.__select(elt, abstract)
                 _details = cls.__select(elt, details)
                 rec = cls.record(_source, _target_source, _id, _title,
                                  _link, _abstract, _details).to_dict()
                 rec = Highlighter.highlight(
                     rec, ['title', 'abstract'], highlight)
                 log.info(f"Record: {rec}")
                 records.append(rec)
     except Exception as ex:
         log.error(ex)
         traceback.print_exc(file=stdout)
     finally:
         return records
 def destroy(cls):
     for hash in cls.trackers:
         log.info(f"Killing tracker {hash}.")
         cls.trackers[hash].kill()
         lockFile = join('temp', hash + '.lock')
         if Path(lockFile).is_file():
             Path(lockFile).unlink()
Ejemplo n.º 4
0
    def test_non_str_logging(self):
        logger.info(10)
        self.assertIn("10", self.last_line())

        # Those should not throw any error.
        logger.debug([10, 20, 30])
        logger.critical({})
        logger.warning(set([-1, 4]))
 def kill(self):
     self.running = False
     self.__status = 'dieing'
     log.info(f"[{self.id}] Waiting for flush...")
     self.join()
     self.__handleQ(True)
     log.info(f"[{self.id}] Final flush done.")
     self.__status = 'dead'
Ejemplo n.º 6
0
 def test_message_is_logged(self):
     logger.debug("message 1")
     self.assertIn("message 1", self.last_line())
     logger.info("message 2")
     self.assertIn("message 2", self.last_line())
     logger.warning("message 3")
     self.assertIn("message 3", self.last_line())
     logger.error("message 4")
     self.assertIn("message 4", self.last_line())
     logger.critical("message 5")
     self.assertIn("message 5", self.last_line())
Ejemplo n.º 7
0
 def test_filepath_is_logged(self):
     logger.debug("message 1")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.info("message 2")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.warning("message 3")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.error("message 4")
     self.assertIn("test_colored_logger.py", self.last_line())
     logger.critical("message 5")
     self.assertIn("test_colored_logger.py", self.last_line())
Ejemplo n.º 8
0
 def test_level_is_logged(self):
     logger.debug("message 1")
     self.assertIn("DEBUG", self.last_line())
     logger.info("message 2")
     self.assertIn("INFO", self.last_line())
     logger.warning("message 3")
     self.assertIn("WARNING", self.last_line())
     logger.error("message 4")
     self.assertIn("ERROR", self.last_line())
     logger.critical("message 5")
     self.assertIn("CRITICAL", self.last_line())
 def __handleQ(self, empty=False):
     # log.debug(f"[{self.id}] Handling change queue...")
     if not self.changesQ.empty():
         changes = []
         size = 0
         while not self.changesQ.empty():
             changes += self.changesQ.get()
             if not empty:
                 if size > ChangeTracker.BULK_SIZE:
                     break
             size += 1
         self.__status = 'flushing'
         log.info(f"[{self.id}] Flushing {len(changes)} changes...")
         Change.objects.insert(changes)
         log.info(f"[{self.id}] Flushed {len(changes)} changes.")
         self.__status = 'idle'
Ejemplo n.º 10
0
 def get(url: str, **kwargs):
     log.info(f"Request url={url} args={kwargs}")
     try:
         headers = {
             'User-Agent':
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
         }
         res = requests.get(url, params=kwargs, timeout=5, headers=headers)
         log.debug(f"Response code={res.status_code}")
         if res.ok:
             with open(f"{url.replace('/', '_')}.html", 'w') as f:
                 f.write(res.text)
             return res.text
         else:
             log.error(res.text)
             return None
     except requests.RequestException as ex:
         log.error(ex)
 def __new__(cls, mongo_uri, collections, fields):
     hash = hashlib.md5(
         (mongo_uri + ''.join(collections)).encode('utf-8')).hexdigest()
     lockFile = join('temp', hash + '.lock')
     Path(lockFile).mkdir(parents=True, exist_ok=True)
     if not Path(lockFile).is_file():
         Path(lockFile).touch()
         if hash not in cls.trackers:
             log.info(
                 f"Start change tracker '{hash}' for {cls.__hide_pw(mongo_uri)}, collections=[{', '.join(collections)}] fields=[{', '.join(fields)}]"
             )
             cls.trackers[hash] = cls.__ChangeTracker(
                 mongo_uri, collections, fields)
         return cls.trackers[hash]
     else:
         log.warn(
             f"Change tracker '{hash}'' seams to be already started for {cls.__hide_pw(mongo_uri)}, collections=[{', '.join(collections)}] fields=[{', '.join(fields)}]"
         )
         return cls.__ChangeTracker(None, None)
Ejemplo n.º 12
0
 def log_all():
     logger.debug("message 1")
     logger.info("message 2")
     logger.warning("message 3")
     logger.error("message 4")
     logger.critical("message 5")
Ejemplo n.º 13
0
 def test_terminal_logging(self):
     logger.info("message to terminal device")
     self.assertIn("INFO", self.last_line())
     # 118 (the length without colors) + 4 coloring characters.
     self.assertGreaterEqual(len(self.last_line()), 118 + 4)
Ejemplo n.º 14
0
def search():
    # TODO: to be removed
    with open(r'./pwsp.yaml') as file:
        pwsp_config = yaml.load(file, Loader=yaml.FullLoader)
    ###
    must_terms, optional_terms = TermParser.parse(request.args.get('q'))

    must_terms, expanded_must_terms = Expander.expand(must_terms)
    optional_terms, expanded_optional_terms = Expander.expand(optional_terms)

    expand = request.args.get('expand', 'true') == 'true'
    max = request.args.get('max', 10)
    sources = request.args.get('source').split(',') if request.args.get(
        'source') else None
    start = timer()
    items = []
    for source in pwsp_config['sources']:
        if 'disabled' not in source or not source['disabled']:
            if sources is None or source['source'] in sources:
                log.info(f"Query source {source['source']}")
                queryBuilder = QueryBuilderFactory.create(
                    source['query_builder'] if 'query_builder' in
                    source else 'default')
                for query in queryBuilder.build(
                        must_terms=expanded_must_terms
                        if expand else must_terms,
                        optional_terms=expanded_optional_terms
                        if expand else optional_terms):
                    # multistage
                    if 'urls' in source:
                        for url in source['urls']:
                            __fix_parameters(url['params'], query, max)
                        doc = MultistageRequester.get(source['urls'])
                    else:
                        __fix_parameters(source['params'], query, max)
                        doc = Requester.get(source['url'], **source['params'])
                    for parse in source['parse']:
                        new_items = Parser.parseHtml(
                            doc,
                            highlight=expanded_must_terms +
                            expanded_optional_terms if expand else must_terms +
                            optional_terms,
                            source=source['source'],
                            target_source=source['target_source']
                            if 'target_source' in source else None,
                            **parse)
                        for item in new_items:
                            item.update({'query': query})
                        items += new_items
    log.info("Finalize...")
    result = {}
    result['source_count_details'] = __get_statistics(items, 'source')

    items = __consolidate_items(items)
    result['count'] = len(items)
    result['source_count'] = __get_statistics(items)
    result['terms'] = must_terms + optional_terms
    result['expanded_terms'] = expanded_must_terms + expanded_optional_terms
    result['expand'] = expand
    end = timer()
    result['took'] = td(seconds=end - start).total_seconds()
    result['items'] = items
    return jsonify(result)
Ejemplo n.º 15
0
CORS(app)

app.config['DEFAULT_RENDERERS'] = [
    'flask_api.renderers.JSONRenderer',
    'flask_api.renderers.BrowsableAPIRenderer',
]

app.secret_key = b'Q#G[DK.]uVs9qXW*hWvc32VW!wzL^2A?'

# Add UTF-8 support
app.config['JSON_AS_ASCII'] = False

# disable sorting json keys
app.config['JSON_SORT_KEYS'] = False

log.info("Loading .env")
env.read_envfile()

with open(r'./pwsp.yaml') as file:
    pwsp_config = yaml.load(file, Loader=yaml.FullLoader)


def __get_statistics(items, by='target_source'):
    source_count = {}
    for item in items:
        if item[by] not in source_count:
            source_count[item[by]] = 0
        source_count[item[by]] += 1
    return source_count

Ejemplo n.º 16
0
from flask_cors import CORS
from flask_script import Manager, Server
from friendlylog import colored_logger as log
from mongoengine.queryset.visitor import Q
from tendo import singleton
from tendo.singleton import SingleInstanceException

from modules.decorators import queryset_respose
from flask_add_ons.logging import colorize_werkzeug

# Logging
logging.basicConfig(level=logging.DEBUG)
colorize_werkzeug()

__dev__ = environ['FLASK_ENV'] == 'development' or environ['DEBUG'] == 'True'
log.info(f"Development mode is '{__dev__}'")

# make sure only one instance is running
if not __dev__:
    try:
        me = singleton.SingleInstance()
    except SingleInstanceException:
        log.error("MCT already running!")
        exit(1)

app = Flask(__name__)
CORS(app)

app.config['DEFAULT_RENDERERS'] = [
    'flask_api.renderers.JSONRenderer',
    'flask_api.renderers.BrowsableAPIRenderer',