Esempio n. 1
0
 def does_everything_exist(self) -> bool:
     '''
     check dirs and files exists, if not create them.
     '''
     try:
         util = skim_utils.SkimUitls()
         print(str(self.basepath))
         print(self.path_to_urls)
         print(str(self.logfile))
         base = util.check_dir_exists(self.basepath)
         print(str(base))
         urls = util.check_file_exists(self.path_to_urls)
         print(str(urls))
         log = util.check_file_exists(self.logfile)
         print(str(log))
         if not base or not urls or not log:
             raise IOError
         else:
             return True
     except IOError as i:
         print(
             "IO Error! - controller.does_everything_exist.Checking_files_creating_if_not_exists: "
             + str(i))
     except Exception as e:
         print("Error! - controller.does_everything_exist: " + str(e))
Esempio n. 2
0
 def is_internet_available(self) -> bool:
     '''
     Check that internet connectivity is available.
     '''
     try:
         utils = skim_utils.SkimUitls()
         if utils.test_internet():
             return True
         else:
             return False
     except Exception as e:
         print("Error! in SkimController.is_internet_available: " + str(e))
Esempio n. 3
0
 def checker(self, line: str) -> bool:
     '''
     Checks each element of the dynamic_content "List" against the "line" parameter and return bool
     '''
     try:
         import skim_utils
         lint = skim_utils.SkimUitls().lint
         line = str(line)
         cont = self.dynamic_content
         for el in cont:
             if el in line:
                 return True
         return False
     except Exception as e:
         print("Error! in Hasher.checker: " + str(e))
Esempio n. 4
0
 def clean_and_print_banner(self) -> bool:
     '''
     clean up and print the banner.
     '''
     try:
         import toolbag
         import skim_conf
         utils = skim_utils.SkimUitls()
         conf = skim_conf.Skim_conf()
         tb = toolbag.Toolbag()
         utils.remove_orphan_files(self.basepath)
         tb.clear_screen()
         conf.show_banner()
         return True
     except Exception as e:
         print("Error! in SkimController.clean_and_print_banner: " + str(e))
Esempio n. 5
0
def main():
    '''
    Execution flow is controlled from here.
    '''
    controller = SkimController()
    lint = skim_utils.SkimUitls().lint
    if not controller.is_internet_available:
        lint("\nError! - Check internet connectivity\n")
        exit(1)
    #check dirs and files exists, if not create them.
    if not controller.does_everything_exist():
        lint("\nIO Error! - File does not exist.")
        exit(1)
    else:
        import skim_reader_io
        reader = skim_reader_io.SkimReader().fetch_domain_list
        controller.clean_and_print_banner()
        list_of_domains = reader(controller.path_to_urls)
        controller.parallelize(list_of_domains)
Esempio n. 6
0
 def print_counts(self):
     '''
     Print the number of sites in each category
     '''
     try:
         import skim_controller
         import skim_utils
         import skim_reader_io
         lint = skim_utils.SkimUitls().lint
         lint("***************************************")
         lint("Total Sites: " + str(self.get_number_of_domains()))
         lint("Sites up: " + str(self.count_sites("up.txt")))
         lint("Sites not responding: " +
              str(self.count_sites("not_responding.txt")))
         lint("Sharepoint Sites: " +
              str(self.count_sites("sharepoint.txt")))
         lint("Wordpress Sites: " + str(self.count_sites("wordpress.txt")))
         lint("Drupal Sites: " + str(self.count_sites("drupal.txt")))
         lint("Joomla Sites: " + str(self.count_sites("joomla.txt")))
         lint("***************************************")
     except Exception as e:
         print("Error! in Cleaner.print_counts(): " + str(e))
Esempio n. 7
0
    def fetch_domain_list(self, dir: str) -> List:
        '''
        Get the list of domains to scan, from file.
        Filter through the whitelist_sieve.
        Shuffle and return clean list
        '''
        try:
            import toolbag
            import skim_utils
            import time
            url_list = []
            shuf = toolbag.Toolbag().shuffler
            dir = str(dir)
            apd = url_list.append
            lint = skim_utils.SkimUitls().lint
            with open(dir, "r") as url_file:
                [
                    apd("http://" + str(url))
                    for url in url_file.read().splitlines()
                ]
                url_file.close()
            col = toolbag.Toolbag().color
            lint(
                col(
                    "Number of domains before whitelisting: " +
                    str(len(url_list)), "yellow"))
            time.sleep(.7)
            clean_list = self.whitelist_sieve(url_list)
            clean_list = shuf(clean_list)

            lint(
                col(
                    "Number of domains after  whitelisting: " +
                    str(len(clean_list)), "yellow") + "\n\n")
            time.sleep(.5)
            return clean_list
        except Exception as e:
            print("Error! in SkimController.fetch_domain_list: " + str(e))
Esempio n. 8
0
    def send_request(self, url):
        '''g
        Take url, send HTTP request then process through logic according to response.
        '''
        try:
            import toolbag
            import skim_cms_filter
            import skim_controller
            headers = toolbag.Toolbag().get_headers("ie")
            writer = skim_writer_io.Skim_writer_io().writer
            http_timeout = skim_controller.SkimController().http_timeout
            utils = skim_utils.SkimUitls()
            cms = skim_cms_filter.SkimCmsFilter()
            url = str(url)
            res = requests.get(url, headers=headers, timeout=http_timeout)
            code = res.status_code
            if code == 200:
                writer(url, "up")
                utils.manage_content(url, res.text)

                #short circuit, if one CMS then not another, in order of probability
                if not cms.is_it_sharepoint(url, res.headers):
                    if not cms.is_it_wordpress(url, res.headers):
                        if not cms.is_it_drupal(url, res.headers, res.text):
                            cms.is_it_joomla(url, res.headers, res.text)
                        else:
                            pass
                    else:
                        pass
                else:
                    #Uncomment to show performance stats
                    #utils.lint(str(url) + " Perf Manage Content: " + str(utils.perf_manage_content))
                    #utils.lint(str(url) + ": Perf Drupal: " + str(cms.perf_is_it_drupal))
                    #utils.lint(str(url) + ": Perf Wordpress: " + str(cms.perf_is_it_wordpress))
                    #utils.lint(str(url) + ": Perf_sharepoint: " + str(cms.perf_is_it_sharepoint))
                    #utils.lint(str(url) + ": Perf_Joomla: " + str(cms.perf_is_it_joomla))
                    return True
            else:
                res.raise_for_status()

        except requests.ConnectTimeout as ct:
            writer = skim_writer_io.Skim_writer_io().writer
            lint = skim_utils.SkimUitls().lint
            if "SSL" in str(ct):
                lint(url + " - SSL Error!!! " + str(ct) + "\n")
                writer(url, "ssl")
                return True
            else:
                lint(url + " - Connect Timeout!!! " + str(ct) + "\n")
                writer(url, "not_responding")
                return True

        except requests.ConnectionError as c:
            writer = skim_writer_io.Skim_writer_io().writer
            lint = skim_utils.SkimUitls().lint
            if "SSSLError" in str(c):
                lint(url + " - SSL Error !!! \n")
                writer(url, "ssl")
                return True

            elif "503" in str(c):
                lint(url + " - 503 Transaction error!! \n")
                writer(url, "up")
                return True

            elif "Max retries exceeded with url" in str(c):
                lint(url + " - site not responding\n")
                writer(url, "not_responding")
                return True
            else:
                lint(url + " - Connection Error !!! " + "\n")
                writer(url, "not_responding")
                return True

        except requests.ReadTimeout as rt:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! Read Timeout!!! " + str(rt) + "\n")
            writer(url, "investigate")
            return True

        except requests.URLRequired as ur:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! URLRequired Error!!! " + str(ur) + "\n")
            writer(url, "investigate")
            return True

        except requests.TooManyRedirects as tmr:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! TooManyRedirects Error!!! " + str(tmr) + "\n")
            writer(url, "investigate")
            return True

        except requests.RequestsDependencyWarning as d:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! RequestsDependencyWarning Error!!! " + str(d) + "\n")
            writer(url, "investigate")
            return True

        except requests.HTTPError as h:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            if "Authorization Required" in str(h):
                writer = skim_writer_io.Skim_writer_io().writer
                lint(url + " !!! 401 Auth required !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "Client Error: Forbidden for url" in str(h):
                lint(url + " !!! 403 Forbidden !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "404 Client Error: Not Found for url " in str(h):
                lint(url + " !!! 404 Not Found !!! " + str(h) + "\n")
                writer(url, "not_responding")
                return True

            elif "500 Server Error: INTERNAL SERVER ERROR" in str(h):
                lint(url + " !!! 500 INTERNAL SERVER ERROR !!! " + str(h) + "\n")
                writer(url, "up")
                return True

            elif "503 Server Error: Your transaction has failed." in str(h):
                lint(url + " !!! 503 Transaction Failed !!! " + str(h) + "\n")
                writer(url, "up")
                return True
            else:
                lint(url + " !!! HTTPError !!! " + str(h) + "\n")
                writer(url, "http_errors")
                return True

        except requests.Timeout as t:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!!Timeout!!! " + str(t) + "\n")
            writer(url, "investigate")
            return True

        except requests.RequestException as r:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            if "URL has an invalid label" in str(r):
                lint(url + " - URL has an invalid label\n")
                writer(url, "up")
                return True
            else:
                lint(url + " !!! request exception!!!! " + str(r) + "\n")
                writer(url, "investigate")
                return True

        except Exception as e:
            lint = skim_utils.SkimUitls().lint
            writer = skim_writer_io.Skim_writer_io().writer
            lint(url + " !!! Investigate !!!" + str(e) + "\n")
            writer(url, "investigate")
            return True