예제 #1
 def whitelist_sieve(self, raw_master_list: List) -> List:
     Filter whitelisted domains from the list of urls to be processed
         import skim_conf
         import skim_writer_io
         whitelist = skim_conf.Skim_conf().whitelist_domains()
         writer = skim_writer_io.Skim_writer_io().writer
         clean_master_list = []
         apend = clean_master_list.append
         in_whitelist = False
         for url in raw_master_list:
             url = str(url)
             for white in whitelist:
                 white = str(white)
                 if white in url:
                     writer(url, "whitelisted")
                     in_whitelist = True
                 if in_whitelist == True:
             if in_whitelist == True:
                 in_whitelist = False
                 if url not in clean_master_list:
         return clean_master_list
     except Exception as e:
         print("Error! in SkimController.whitelist_sieve: " + str(e))
예제 #2
 def is_it_sharepoint(self, url: str, headers: str) -> bool:
     Parse http response data for Sharepoint
         p1 = pc()
         writer = skim_writer_io.Skim_writer_io().writer
         if any("SharePoint" in x for x in headers):
             writer(url, "sharepoint")
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_sharepoint = str(timer)
             return True
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_sharepoint = str(timer)
             return False
     except Exception as e:
         print("Error! in is_it_sharepoint: " + str(e))
예제 #3
 def is_it_joomla(self, url: str, headers: str, content: str) -> bool:
     Parse http response data for Joomla CMS
         p1 = pc()
         writer = skim_writer_io.Skim_writer_io().writer
         search_space = str("".join(headers)) + str(content)
         if "joomla" in search_space:
             writer(url, "joomla")
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_joomla = str(timer)
             return True
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_joomla = str(timer)
             return False
     except Exception as e:
         print("Error! in is_it_joomla: " + str(e))
예제 #4
 def is_it_wordpress(self, url: str, headers: str) -> bool:
     Parse http response data for Wordpress CMS
         p1 = pc()
         writer = skim_writer_io.Skim_writer_io().writer
         headers = str(headers)
         if ("wp" in headers) or ("xmlrpc.php" in headers):
             writer(url, "wordpress")
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_wordpress = str(timer)
             return True
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_wordpress = str(timer)
             return False
     except Exception as e:
         print("Error! in is_it_wordpress " + str(e))
예제 #5
 def is_it_drupal(self, url, headers: str, content: str) -> bool:
     Parse http response data for Drupal CMS
         p1 = pc()
         writer = skim_writer_io.Skim_writer_io().writer
         check = str(headers) + str(content)
         if ("drupal" in check) or ("Drupal" in check):
             writer(url, "drupal")
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_drupal = str(timer)
             return True
             p2 = pc()
             timer = str(p2 - p1)
             timer = timer[:8]
             self.perf_is_it_drupal = str(timer)
             return False
     except Exception as e:
         print("Error! in drupal: " + str(e))
예제 #6
 def manage_content(self, url: str, content: str) -> bool:
     Take content from HTTP response, remove dynamic elements,
     store content and hash for comparison on next pass
         import skim_writer_io
         import skim_hasher
         writer = skim_writer_io.Skim_writer_io().writer
         h = skim_hasher.Hasher()
         modded = h.strip_digest(str(content))
         hashed = h.hashit(modded)
             "$$$$$$$$$$~~~~~~~~~~$$$$$$$$$$" + str(url) +
             "$$$$$$$$$$~~~~~~~~~~$$$$$$$$$$".rstrip("\n").rstrip(" ") +
             str(modded) + "%%%%%%%%%%~~~~~~~~~~~%%%%%%%%%%" + str(url) +
             "%%%%%%%%%%~~~~~~~~~~~%%%%%%%%%%".rstrip("\n").rstrip(" "),
         writer(url + "~" + str(hashed), "hashes")
         return True
     except Exception as e:
         print("Error!! in SkimUitls.manage_content " + str(e))
         self.lint("Error! in SkimUitls.manage_content " + str(e))
예제 #7
    def send_request(self, url):
        Take url, send HTTP request then process through logic according to response.
            import toolbag
            import skim_cms_filter
            import skim_controller
            headers = toolbag.Toolbag().get_headers("ie")
            writer = skim_writer_io.Skim_writer_io().writer
            http_timeout = skim_controller.SkimController().http_timeout
            utils = skim_utils.SkimUitls()
            cms = skim_cms_filter.SkimCmsFilter()
            url = str(url)
            res = requests.get(url, headers=headers, timeout=http_timeout)
            code = res.status_code
            if code == 200:
                writer(url, "up")
                utils.manage_content(url, res.text)

                #short circuit, if one CMS then not another, in order of probability
                if not cms.is_it_sharepoint(url, res.headers):
                    if not cms.is_it_wordpress(url, res.headers):
                        if not cms.is_it_drupal(url, res.headers, res.text):
                            cms.is_it_joomla(url, res.headers, res.text)
                    #Uncomment to show performance stats
                    #utils.lint(str(url) + " Perf Manage Content: " + str(utils.perf_manage_content))
                    #utils.lint(str(url) + ": Perf Drupal: " + str(cms.perf_is_it_drupal))
                    #utils.lint(str(url) + ": Perf Wordpress: " + str(cms.perf_is_it_wordpress))
                    #utils.lint(str(url) + ": Perf_sharepoint: " + str(cms.perf_is_it_sharepoint))
                    #utils.lint(str(url) + ": Perf_Joomla: " + str(cms.perf_is_it_joomla))
                    return True

        except requests.ConnectTimeout as ct:
            writer = skim_writer_io.Skim_writer_io().writer
            lint = skim_utils.SkimUitls().lint
            if "SSL" in str(ct):
                lint(url + " - SSL Error!!! " + str(ct) + "\n")
                writer(url, "ssl")
                return True
                lint(url + " - Connect Timeout!!! " + str(ct) + "\n")
                writer(url, "not_responding")
                return True

        except requests.ConnectionError as c:
            writer = skim_writer_io.Skim_writer_io().writer
            lint = skim_utils.SkimUitls().lint
            if "SSSLError" in str(c):
                lint(url + " - SSL Error !!! \n")
                writer(url, "ssl")
                return True

            elif "503" in str(c):
                lint(url + " - 503 Transaction error!! \n")
                writer(url, "up")
                return True

            elif "Max retries exceeded with url" in str(c):
                lint(url + " - site not responding\n")
                writer(url, "not_responding")
                return True
                lint(url + " - Connection Error !!! " + "\n")
                writer(url, "not_responding")
                return True

        except requests.ReadTimeout as rt:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! Read Timeout!!! " + str(rt) + "\n")
            writer(url, "investigate")
            return True

        except requests.URLRequired as ur:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! URLRequired Error!!! " + str(ur) + "\n")
            writer(url, "investigate")
            return True

        except requests.TooManyRedirects as tmr:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! TooManyRedirects Error!!! " + str(tmr) + "\n")
            writer(url, "investigate")
            return True

        except requests.RequestsDependencyWarning as d:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! RequestsDependencyWarning Error!!! " + str(d) + "\n")
            writer(url, "investigate")
            return True

        except requests.HTTPError as h:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            if "Authorization Required" in str(h):
                writer = skim_writer_io.Skim_writer_io().writer
                lint(url + " !!! 401 Auth required !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "Client Error: Forbidden for url" in str(h):
                lint(url + " !!! 403 Forbidden !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "404 Client Error: Not Found for url " in str(h):
                lint(url + " !!! 404 Not Found !!! " + str(h) + "\n")
                writer(url, "not_responding")
                return True

            elif "500 Server Error: INTERNAL SERVER ERROR" in str(h):
                lint(url + " !!! 500 INTERNAL SERVER ERROR !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "503 Server Error: Your transaction has failed." in str(h):
                lint(url + " !!! 503 Transaction Failed !!! " + str(h) + "\n")
                writer(url, "up")
                return True
                lint(url + " !!! HTTPError !!! " + str(h) + "\n")
                writer(url, "http_errors")
                return True

        except requests.Timeout as t:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!!Timeout!!! " + str(t) + "\n")
            writer(url, "investigate")
            return True

        except requests.RequestException as r:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            if "URL has an invalid label" in str(r):
                lint(url + " - URL has an invalid label\n")
                writer(url, "up")
                return True
                lint(url + " !!! request exception!!!! " + str(r) + "\n")
                writer(url, "investigate")
                return True

        except Exception as e:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! Investigate !!!" + str(e) + "\n")
            writer(url, "investigate")
            return True