Example #1
0
 def _get_web_page_from_db(self, current_session, page_id=None, url=None, page=None):
     if page is None:
         if page_id is not None:
             page = self.pages.find_one({"session": current_session, "web_page_id": page_id })
         elif url is not None:
             page = self.pages.find_one({"session": current_session, "url": url})
         else:
             raise AttributeError("You must specifies either page_id or url")
         if page is None:
             return None
     clickables = self.get_all_clickables_to_page_id_from_db(current_session, page['web_page_id'])
     forms = self.get_all_forms_to_page_id_from_db(current_session, page['web_page_id'])
     result = WebPage(page['web_page_id'], page['url'], page['html'], None, page['current_depth'], page['base_url'])
     result.clickables = clickables
     result.forms = forms
     links = []
     for link in page['links']:
         links.append(self._parse_link_from_db(link))
     result.links = links
     timemimg_requests = []
     for request in page['timing_requests']:
         timemimg_requests.append(self.get_asyncrequest_to_id(current_session, request))
     result.timing_requests = timemimg_requests
     ajax = []
     for request in page['ajax_requests']:
         ajax.append(self.get_asyncrequest_to_id(current_session, request))
     result.ajax_requests = ajax
     return result
Example #2
0
 def _get_web_page_from_db(self,
                           current_session,
                           page_id=None,
                           url=None,
                           page=None):
     if page is None:
         if page_id is not None:
             page = self.pages.find_one({
                 "session": current_session,
                 "web_page_id": page_id
             })
         elif url is not None:
             page = self.pages.find_one({
                 "session": current_session,
                 "url": url
             })
         else:
             raise AttributeError(
                 "You must specifies either page_id or url")
         if page is None:
             return None
     clickables = self.get_all_clickables_to_page_id_from_db(
         current_session, page['web_page_id'])
     forms = self.get_all_forms_to_page_id_from_db(current_session,
                                                   page['web_page_id'])
     result = WebPage(page['web_page_id'], page['url'], page['html'], None,
                      page['current_depth'], page['base_url'])
     result.clickables = clickables
     result.forms = forms
     links = []
     for link in page['links']:
         links.append(self._parse_link_from_db(link))
     result.links = links
     timemimg_requests = []
     for request in page['timing_requests']:
         timemimg_requests.append(
             self.get_asyncrequest_to_id(current_session, request))
     result.timing_requests = timemimg_requests
     ajax = []
     for request in page['ajax_requests']:
         ajax.append(self.get_asyncrequest_to_id(current_session, request))
     result.ajax_requests = ajax
     return result
Example #3
0
 def _login_and_return_webpage(self,
                               login_form,
                               page_with_login_form=None,
                               login_data=None,
                               login_clickable=None):
     if page_with_login_form is None:
         page_with_login_form = self._page_with_loginform_logged_out
     try:
         if login_clickable is not None:
             tmp_page = deepcopy(page_with_login_form)
             event_state, page_with_login_form = self._event_executor.execute(
                 tmp_page, element_to_click=login_clickable)
             if event_state == EventResult.ErrorWhileInitialLoading:
                 sleep(2000)
                 event_state, page_with_login_form = self._event_executor.execute(
                     tmp_page, element_to_click=login_clickable)
                 if event_state == EventResult.ErrorWhileInitialLoading:
                     logging.debug(
                         "Two time executing fails.. stop crawling")
                     return None
             self.domain_handler.complete_urls_in_page(page_with_login_form)
             self.domain_handler.analyze_urls(page_with_login_form)
             self.async_request_handler.handle_requests(
                 page_with_login_form)
         logging.debug("Start submitting login form...")
         response_code, html_after_timeouts, new_clickables, forms, links, timemimg_requests = self._form_handler.submit_form(
             login_form, page_with_login_form, login_data)
     except ValueError:
         return None
     #TODO: Put building of Webpage inside submit function
     page_after_login = WebPage(-1, page_with_login_form.url,
                                html_after_timeouts)
     page_after_login.clickables = new_clickables
     page_after_login.links = links
     page_after_login.timing_requests = timemimg_requests
     page_after_login.forms = forms
     self.domain_handler.complete_urls_in_page(page_after_login)
     self.domain_handler.analyze_urls(page_after_login)
     self.async_request_handler.handle_requests(page_after_login)
     return page_after_login
Example #4
0
    def analyze(self, url_to_request, timeout=10, current_depth=None, method="GET", data={}):
        try:
            url_to_request = url_to_request.toString()
        except AttributeError:
            url_to_request = url_to_request

        logging.debug("Start analyzing the url {}...".format(url_to_request))
        self._timing_requests = []
        self._new_clickables = []
        self._timeming_events = []
        self._current_timeming_event = None
        self._loading_complete = False
        self._analyzing_finished = False
        self.response_code = {}
        if method == "GET":
            self.mainFrame().load(QUrl(url_to_request))
        else:
            request = self.make_request(url_to_request)
            data = self.post_data_to_array(data)
            request.setRawHeader("Content-Type", QByteArray("application/x-www-form-urlencoded"))
            self.mainFrame().load(request, QNetworkAccessManager.PostOperation, data)
        t = 0
        while not self._loading_complete and t < timeout:  # Waiting for finish processing
            self._wait(self.wait_for_processing)
            t += self.wait_for_processing

        videos = self.mainFrame().findAllElements("video")
        if len(videos) > 0:
            logging.debug("{} videos found... removing them")
            for video in videos:
                video.removeFromDocument()

        overall_waiting_time = t
        buffer = 250
        while len(self._timeming_events) > 0 and overall_waiting_time < timeout:
            self._current_timeming_event = self._timeming_events.pop(0)  # Take the first event(ordered by needed time
            self._waiting_for = self._current_timeming_event["event_type"]  # Setting kind of event
            waiting_time_in_milliseconds = (
                self._current_timeming_event["time"] - overall_waiting_time
            )  # Taking waiting time and convert it from milliseconds to seconds
            waiting_time_in_milliseconds = (waiting_time_in_milliseconds + buffer) / 1000.0
            if waiting_time_in_milliseconds < 0.0:
                waiting_time_in_milliseconds = 0
            self._wait(waiting_time_in_milliseconds)  # Waiting for 100 millisecond before expected event
            overall_waiting_time += waiting_time_in_milliseconds
        if overall_waiting_time < 0.5:
            self._wait((0.5 - overall_waiting_time))

        # Just for debugging
        # f = open("text.txt", "w")
        # f.write(self.mainFrame().toHtml())
        # f.close()
        base_url = self.mainFrame().findFirstElement("base")
        if base_url is not None:
            base_url = base_url.attribute("href")

        links, clickables = extract_links(self.mainFrame(), url_to_request)
        forms = extract_forms(self.mainFrame())
        elements_with_event_properties = property_helper(self.mainFrame())
        self.mainFrame().evaluateJavaScript(self._property_obs_js)
        self._wait(0.1)

        self._analyzing_finished = True
        html_after_timeouts = self.mainFrame().toHtml()
        response_url = self.mainFrame().url().toString()

        self.mainFrame().setHtml(None)
        self._new_clickables.extend(clickables)
        self._new_clickables.extend(elements_with_event_properties)
        self._new_clickables = purge_dublicates(self._new_clickables)
        response_code = None
        try:
            response_code = self.response_code[url_to_request]
        except KeyError:
            response_code = 200
        if response_code is None:
            response_code = 200
        try:
            current_page = WebPage(self.parent().get_next_page_id(), response_url, html_after_timeouts)
        except AttributeError:  # Attacker don't need this function...
            current_page = WebPage(42, response_url, html_after_timeouts)
        current_page.timing_requests = self._timing_requests
        current_page.clickables = self._new_clickables
        current_page.links = links
        current_page.forms = forms
        if base_url is not None and base_url != "":
            current_page.base_url = base_url
        return response_code, current_page
    def analyze(self,
                url_to_request,
                timeout=10,
                current_depth=None,
                method="GET",
                data={}):
        try:
            url_to_request = url_to_request.toString()
        except AttributeError:
            url_to_request = url_to_request

        logging.debug("Start analyzing the url {}...".format(url_to_request))
        self._timing_requests = []
        self._new_clickables = []
        self._timeming_events = []
        self._current_timeming_event = None
        self._loading_complete = False
        self._analyzing_finished = False
        self.response_code = {}
        if method == "GET":
            self.mainFrame().load(QUrl(url_to_request))
        else:
            request = self.make_request(url_to_request)
            data = self.post_data_to_array(data)
            request.setRawHeader(
                'Content-Type',
                QByteArray('application/x-www-form-urlencoded'))
            self.mainFrame().load(request, QNetworkAccessManager.PostOperation,
                                  data)
        t = 0
        while (not self._loading_complete
               and t < timeout):  # Waiting for finish processing
            self._wait(self.wait_for_processing)
            t += self.wait_for_processing

        videos = self.mainFrame().findAllElements("video")
        if len(videos) > 0:
            logging.debug("{} videos found... removing them")
            for video in videos:
                video.removeFromDocument()

        overall_waiting_time = t
        buffer = 250
        while len(
                self._timeming_events) > 0 and overall_waiting_time < timeout:
            self._current_timeming_event = self._timeming_events.pop(
                0)  # Take the first event(ordered by needed time
            self._waiting_for = self._current_timeming_event[
                'event_type']  # Setting kind of event
            waiting_time_in_milliseconds = (
                self._current_timeming_event["time"] - overall_waiting_time
            )  # Taking waiting time and convert it from milliseconds to seconds
            waiting_time_in_milliseconds = (
                (waiting_time_in_milliseconds + buffer) / 1000.0)
            if waiting_time_in_milliseconds < 0.0:
                waiting_time_in_milliseconds = 0
            self._wait(waiting_time_in_milliseconds
                       )  # Waiting for 100 millisecond before expected event
            overall_waiting_time += waiting_time_in_milliseconds
        if overall_waiting_time < 0.5:
            self._wait((0.5 - overall_waiting_time))

        # Just for debugging
        #f = open("text.txt", "w", encoding="utf-8")
        #f.write(self.mainFrame().toHtml())
        #f.close()
        base_url = self.mainFrame().findFirstElement("base")
        if base_url is not None:
            base_url = base_url.attribute("href")

        links, clickables = extract_links(self.mainFrame(), url_to_request)
        forms = extract_forms(self.mainFrame())
        elements_with_event_properties = property_helper(self.mainFrame())
        self.mainFrame().evaluateJavaScript(self._property_obs_js)
        self._wait(0.1)

        self._analyzing_finished = True
        html_after_timeouts = self.mainFrame().toHtml()
        response_url = self.mainFrame().url().toString()

        self.mainFrame().setHtml(None)
        self._new_clickables.extend(clickables)
        self._new_clickables.extend(elements_with_event_properties)
        self._new_clickables = purge_dublicates(self._new_clickables)
        response_code = None
        try:
            response_code = self.response_code[url_to_request]
        except KeyError:
            response_code = 200
        if response_code is None:
            response_code = 200
        try:
            current_page = WebPage(self.parent().get_next_page_id(),
                                   response_url, html_after_timeouts)
        except AttributeError:  #Attacker don't need this function...
            current_page = WebPage(42, response_url, html_after_timeouts)
        current_page.timing_requests = self._timing_requests
        current_page.clickables = self._new_clickables
        current_page.links = links
        current_page.forms = forms
        if base_url is not None and base_url != "":
            current_page.base_url = base_url
        return response_code, current_page