Example #1
0
def test_database_session():
    """
    Test for connect database session
    :return:
    """
    engine = Engine.create(ini=ini)
    with Session(engine=engine) as session:
        assert session
Example #2
0
    def save(self, id, obj):
        """Save crawled data into database."""
        Log.i("Saving crawled data")

        meta = {
            'id': id,
        }

        engine = Engine.create(ini=self.ini)

        with Session(engine=engine) as session:
            domain = session.query(Domain).filter_by(uuid=id).first()

        engine.dispose()

        # pass the pipeline before saving data (for preprocessing)
        for pipeline in pipelines.__all__:
            _class = pipeline(domain, data=obj, ini=self.ini)

            if _class.active:
                Log.d(f"handling the {_class.name} pipeline")
                try:
                    _class.handle()
                except:
                    Log.e(f"Error while handling {_class.name} pipeline")
            else:
                Log.d(f"{_class.name} pipeline isn't active")

            del _class

        with Elastic(ini=self.ini):
            # upload screenshot at Amazon S3
            screenshot = self.upload_screenshot(obj.webpage.screenshot, id)

            Webpage(
                meta=meta,
                url=obj.webpage.url,
                domain=obj.webpage.domain,
                title=obj.webpage.title,
                time=datetime.now(),
                source=obj.webpage.source,
                screenshot=screenshot,
                language=obj.webpage.language,
                headers=obj.webpage.headers,
                tree=obj.webpage.tree,
            ).save()

            Port(meta=meta,
                 services=[
                     Service(number=port['number'], status=port['status'])
                     for port in obj.port
                 ]).save()
Example #3
0
def test_manage_model():
    """
    Test for create a new table at memory database
    :return:
    """
    engine = Engine.create(ini=ini)

    # add new data
    with Session(engine=engine) as session:
        session.add(Domain('test', 'https://formed_url.onion'))
        session.commit()

    with Session(engine=engine) as session:
        assert session.query(Domain).filter(Domain.uuid == 'test').count() == 1
        assert session.query(Domain).filter(
            Domain.uuid == 'is_not_exist').count() == 0
Example #4
0
    def handle(self):
        super(BitcoinPipeline, self).handle()
        addresses = re.findall(r'([13][a-km-zA-HJ-NP-Z0-9]{26,33})',
                               self.data.webpage.source)

        engine = Engine.create(ini=self.ini)

        with Session(engine=engine) as session:
            for address in addresses:
                if self.validate_address(address):
                    Log.d("{} address is valid address".format(address))
                    instance = get_or_create(session, Address, address=address)
                    instance.domains.append(self.domain)
                    session.add(instance)
                    session.commit()

        engine.dispose()
Example #5
0
    def save(self):
        """
        Save domain on database and request crawling.
        :return: None
        """
        engine = Engine.create(self.ini)
        with Session(engine=engine) as session:
            for url in self.urls:
                task_id = uuid4().hex

                try:
                    # add url into database
                    session.add(Domain(uuid=task_id, url=url))
                    session.commit()

                    task = run_crawler.apply_async(args=(url, ),
                                                   task_id=task_id)
                    Log.i("Crawler issued a new task id {} at {}".format(
                        task.task_id, url))
                except:
                    Log.d(
                        "This {} url already saved into database.".format(url))
                finally:
                    self.urls.remove(url)
Example #6
0
def test_create_engine():
    """
    Test for create a new engine
    :return:
    """
    assert Engine.create(ini=ini)