Beispiel #1
0
    def scan(self, url):
        """Scan and crawl url which user requested."""
        Log.i("Trying to crawl {} url".format(url))

        domain = urlparse(url).netloc
        obj = DynamicObject()

        # Step 1. Visit website using headless tor browser
        Log.d("Step 1. Visiting {} website using headless browser".format(url))

        browser = HeadlessBrowser(ini=self.ini, tor_network=True)

        report = browser.run(url)

        del browser

        # if browser have an exception return from here
        if not report:
            return obj

        obj.webpage = report

        # Step 2. Scan common service port
        Log.d(
            "Step 2. Scanning {} domain's common service port".format(domain))
        obj.port = self._portscan(domain)

        # Step 3. TO-DO

        return obj
Beispiel #2
0
 def create(cls, ini):
     Log.d("Creating database engine...")
     engine = create_engine(
         ini.read('DATABASE', 'URL'),
         echo=True if ini.read('DATABASE', 'DEBUG') is 'true' else False)
     Base.metadata.create_all(bind=engine)
     return engine
Beispiel #3
0
    def save(self, id, obj):
        """Save crawled data into database."""
        Log.i("Saving crawled data")

        meta = {
            'id': id,
        }

        engine = Engine.create(ini=self.ini)

        with Session(engine=engine) as session:
            domain = session.query(Domain).filter_by(uuid=id).first()

        engine.dispose()

        # pass the pipeline before saving data (for preprocessing)
        for pipeline in pipelines.__all__:
            _class = pipeline(domain, data=obj, ini=self.ini)

            if _class.active:
                Log.d(f"handling the {_class.name} pipeline")
                try:
                    _class.handle()
                except:
                    Log.e(f"Error while handling {_class.name} pipeline")
            else:
                Log.d(f"{_class.name} pipeline isn't active")

            del _class

        with Elastic(ini=self.ini):
            # upload screenshot at Amazon S3
            screenshot = self.upload_screenshot(obj.webpage.screenshot, id)

            Webpage(
                meta=meta,
                url=obj.webpage.url,
                domain=obj.webpage.domain,
                title=obj.webpage.title,
                time=datetime.now(),
                source=obj.webpage.source,
                screenshot=screenshot,
                language=obj.webpage.language,
                headers=obj.webpage.headers,
                tree=obj.webpage.tree,
            ).save()

            Port(meta=meta,
                 services=[
                     Service(number=port['number'], status=port['status'])
                     for port in obj.port
                 ]).save()
Beispiel #4
0
    def handle(self):
        super(BitcoinPipeline, self).handle()
        addresses = re.findall(r'([13][a-km-zA-HJ-NP-Z0-9]{26,33})',
                               self.data.webpage.source)

        engine = Engine.create(ini=self.ini)

        with Session(engine=engine) as session:
            for address in addresses:
                if self.validate_address(address):
                    Log.d("{} address is valid address".format(address))
                    instance = get_or_create(session, Address, address=address)
                    instance.domains.append(self.domain)
                    session.add(instance)
                    session.commit()

        engine.dispose()
Beispiel #5
0
    def collect(self):
        Log.d("Start collecting from freshonion API")
        response = HTTP.request(
            url='http://zlal32teyptf4tvi.onion/json/all',
            tor_network=True,
            ini=self.ini
        )

        if not response:
            Log.e("Exception accrued while loading website.")
            return

        if response.status_code == 200:
            rows = response.json()
            Log.i("{} url detected from freshonion".format(len(rows)))

            for row in rows:
                url = self._get_formed_url(row)
                if url not in self.urls:
                    self.urls.append(url)
Beispiel #6
0
    def save(self):
        """
        Save domain on database and request crawling.
        :return: None
        """
        engine = Engine.create(self.ini)
        with Session(engine=engine) as session:
            for url in self.urls:
                task_id = uuid4().hex

                try:
                    # add url into database
                    session.add(Domain(uuid=task_id, url=url))
                    session.commit()

                    task = run_crawler.apply_async(args=(url, ),
                                                   task_id=task_id)
                    Log.i("Crawler issued a new task id {} at {}".format(
                        task.task_id, url))
                except:
                    Log.d(
                        "This {} url already saved into database.".format(url))
                finally:
                    self.urls.remove(url)
Beispiel #7
0
    def _portscan(self, domain):
        """Scan and check opened port."""
        socket = Socket(
            tor_network=True,
            ini=self.ini,
        )

        # common service port list
        services = [
            {
                'number': 20,
                'status': False
            },
            {
                'number': 21,
                'status': False
            },
            {
                'number': 22,
                'status': False
            },
            {
                'number': 23,
                'status': False
            },
            {
                'number': 25,
                'status': False
            },
            {
                'number': 80,
                'status': False
            },
            {
                'number': 110,
                'status': False
            },
            {
                'number': 123,
                'status': False
            },  # NTP
            {
                'number': 143,
                'status': False
            },
            {
                'number': 194,
                'status': False
            },  # IRC
            {
                'number': 389,
                'status': False
            },
            {
                'number': 443,
                'status': False
            },
            {
                'number': 993,
                'status': False
            },  # IMAPS
            {
                'number': 3306,
                'status': False
            },
            {
                'number': 3389,
                'status': False
            },
            {
                'number': 5222,
                'status': False
            },  # XMPP
            {
                'number': 6667,
                'status': False
            },  # Public IRC
            {
                'number': 8060,
                'status': False
            },  # OnionCat
            {
                'number': 8333,
                'status': False
            },  # Bitcoin
        ]

        for i in range(len(services)):
            opened = socket.ping_check(domain, services[i]['number'])
            services[i]['status'] = opened
            Log.d("{} port is {}".format(services[i]['number'],
                                         'opened' if opened else 'closed'))

        del socket

        return services
def test_write_debug():
    Log.d("Test Debugging Message")