Beispiel #1
0
def SiteURLSQL(item, LOG, SQL, TABLEname, PROXY, UAFILE, UAG):
    # remove URL containing UID-style strings
    siteURL = quote(
        re.split("(?:[0-9a-fA-F]:?){32}", item['page']['url'])[0], ':/')
    dn = dirname(siteURL)

    # Test if entry still exist in DB
    if SQL.SQLiteVerifyEntry(TABLEname, dn) == 0:
        now = str(TimestampNow().Timestamp())
        siteDomain = urlparse(item['page']['url']).netloc
        source_url = item['result'].replace("/api/v1", "")
        try:
            IPaddress = socket.gethostbyname(siteDomain)
            if IPaddress:
                rASN = NetInfo()
                ASN = rASN.GetASN(IPaddress).strip('\"')
            else:
                pass
        # can't resolv
        except:
            IPaddress = ""
            ASN = ""

        # HTTP connection
        try:
            proxies = {'http': PROXY, 'https': PROXY}
            UA = UAG.ChooseUA(UAFILE)
            user_agent = {'User-agent': UA}
            try:
                r = requests.get(siteURL,
                                 headers=user_agent,
                                 proxies=proxies,
                                 allow_redirects=True)
                lastHTTPcode = str(r.status_code)
            except ValueError:
                # No user-agent configured
                r = requests.get(siteURL,
                                 proxies=proxies,
                                 allow_redirects=True)
                lastHTTPcode = str(r.status_code)
            except requests.exceptions.Timeout:
                lastHTTPcode = "timeout"
            except requests.exceptions.ConnectionError:
                lastHTTPcode = "aborted"
            except:
                lastHTTPcode = "---"
                pass
        except Exception as e:
            # Unknown status code
            LOG.error("Connection error: {}".format(e))
            pass

        LOG.info(siteURL + " " + siteDomain + " " + IPaddress + " " +
                 source_url + " " + now + " " + lastHTTPcode)
        SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                           source_url, now, lastHTTPcode, ASN)

    else:
        LOG.debug("Entry still known: " + siteURL)
        pass
Beispiel #2
0
def OpenphishExtractor(openphish_file,SearchString,LOG,SQL,TABLEname,PROXY,UAFILE):
	UAG = UAgent()
	with open(openphish_file,"rt") as txt:
		for entry in txt:
			## Search
			if SearchString in entry:
				# remove URL containing UID-style strings
				siteURL = re.split("(?:[0-9a-fA-F]:?){32}", entry.rstrip())[0]
				dn = dirname(siteURL)

				## Test if entry still exist in DB
				#if SQL.SQLiteVerifyEntry(TABLEname, siteURL) is 0:
				if SQL.SQLiteVerifyEntry(TABLEname, dn) is 0:
					now=str(TimestampNow().Timestamp())
					siteDomain=urlparse(entry).netloc
					source_url=openphish_file
					try:
						IPaddress=socket.gethostbyname(siteDomain)
					# can't resolv
					except:
						IPaddress=""

					# HTTP connection
					try:
						proxies = {'http': PROXY, 'https': PROXY}
						UA = UAG.ChooseUA(UAFILE)
						user_agent = {'User-agent': UA}
						try:
							r = requests.get(siteURL, headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5,12))
							lastHTTPcode = str(r.status_code)
						except ValueError:
							# No user-agent configured
							r = requests.get(siteURL, proxies=proxies, allow_redirects=True, timeout=(5,12))
							lastHTTPcode = str(r.status_code)
						except requests.exceptions.Timeout:
							lastHTTPcode = "timeout"
						except requests.exceptions.ConnectionError:
							lastHTTPcode = "aborted"
						except:
							lastHTTPcode = "---"
							err = sys.exc_info()
							LOG.error("HTTP error: " + str(err))
							pass
					except:
						# Unknown status code
						err = sys.exc_info()
						LOG.error("Connection error: " + str(err))
						pass

					# Add data into database
					LOG.info(siteURL)
					SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress, source_url, now, lastHTTPcode)

				else:
					LOG.debug("Entry still known: "+siteURL)
					pass
			else:
				pass
Beispiel #3
0
def PKDownloadOpenDir(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname, DLDir, SQL, PROXY, LOG, UAFILE):
    global Ziplst
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()
    Ziplst = []

    rhtml = requests.get(siteURL, headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5, 12), verify=False)
    thtml = BeautifulSoup(rhtml.text, 'html.parser')
    PageTitle = thtml.title.text
    thtmlatag = thtml.select('a')
    Ziplst += [siteURL + "/" + tag['href'] for tag in thtmlatag if '.zip' in tag.text]
    for file in Ziplst:
        try:
            r = requests.get(file, headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5, 12), verify=False)
            zzip = file.replace('/', '_').replace(':', '')
            if zipfile.is_zipfile(io.BytesIO(r.content)):
                savefile = DLDir + zzip
                # Still collected file
                if os.path.exists(savefile):
                    LOG.info("[DL ] Found still collected archive: " + savefile)
                    return
                # New file to download
                else:
                    LOG.info("[DL ] Found archive in an open dir, downloaded it as: " + savefile)
                    with open(savefile, "wb") as code:
                        code.write(r.content)
                        pass
                    ZipFileName = str(zzip)
                    ZipFileHash = SHA.hashFile(savefile)
                    SQL.SQLiteInvestigUpdatePK(InvTABLEname, siteURL, ZipFileName, ZipFileHash, now, lastHTTPcode)
                    return
            else:
                pass
        except requests.exceptions.ContentDecodingError:
            LOG.error("[DL ] content-type error")
        except:
            pass
Beispiel #4
0
def AddUniqueURL(URLadd, LOG, SQL, TABLEname, PROXY, UAFILE):
    UAG = UAgent()
    # add schema
    if URLadd.startswith("http://") or URLadd.startswith("https://"):
        pass
    else:
        URLadd = "http://{}".format(URLadd)

    # remove URL containing UID-style strings
    siteURL = re.split("(?:[0-9a-fA-F]:?){32}", URLadd.rstrip())[0]
    # Test if entry still exist in DB
    if SQL.SQLiteVerifyEntry(TABLEname, siteURL) == 0:
        now = str(TimestampNow().Timestamp())
        siteDomain = urlparse(URLadd).netloc
        source_url = "Manual"
        try:
            IPaddress = socket.gethostbyname(siteDomain)
            rASN = NetInfo()
            ASN = rASN.GetASN(IPaddress).strip('\"')
        # can't resolv
        except:
            IPaddress = ""
            ASN = ""

        # HTTP connection
        try:
            proxies = {'http': PROXY, 'https': PROXY}
            UA = UAG.ChooseUA(UAFILE)
            user_agent = {'User-agent': UA}
            try:
                r = requests.get(siteURL,
                                 headers=user_agent,
                                 proxies=proxies,
                                 allow_redirects=True,
                                 timeout=(5, 12))
                lastHTTPcode = str(r.status_code)
            except ValueError:
                # No user-agent configured
                r = requests.get(siteURL,
                                 proxies=proxies,
                                 allow_redirects=True,
                                 timeout=(5, 12))
                lastHTTPcode = str(r.status_code)
            except requests.exceptions.Timeout:
                lastHTTPcode = "timeout"
            except requests.exceptions.ConnectionError:
                lastHTTPcode = "aborted"
            except:
                lastHTTPcode = "---"
                err = sys.exc_info()
                LOG.error("HTTP error: " + str(err))
                pass
        except:
            # Unknown status code
            err = sys.exc_info()
            LOG.error("Connection error: " + str(err))
            pass

        # Add data into database
        LOG.info(siteURL)
        SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                           source_url, now, lastHTTPcode, ASN)

    else:
        LOG.info("Entry still known: " + siteURL)
        pass
Beispiel #5
0
def UrlqueryExtractor(LOG, SQL, TABLEname, PROXY, UAFILE):
    UAG = UAgent()
    ## Search in Urlquery HTML file
    try:
        m = re.findall(r"<td><a title='(.*?)' href='(.*?)'>", HTMLText)
        for line in m:
            # remove URL containing UID-style strings
            siteURL = re.split("(?:[0-9a-fA-F]:?){32}", line[0])[0]
            if siteURL.startswith('https:'):
                siteDomain = siteURL.split('/')[2]
            else:
                siteDomain = siteURL.split('/')[0]
                siteURL = "http://" + siteURL

            ## Test if entry still exist in DB
            if SQL.SQLiteVerifyEntry(TABLEname, siteURL) is 0:

                ## Proceed to informations retrieve
                source_url = "https://urlquery.net/" + line[1]

                try:
                    IPaddress = socket.gethostbyname(siteDomain)
                # can't resolv
                except:
                    IPaddress = ""

                now = str(TimestampNow().Timestamp())

                # HTTP connection
                try:
                    proxies = {'http': PROXY, 'https': PROXY}
                    UA = UAG.ChooseUA(UAFILE)
                    user_agent = {'User-agent': UA}
                    try:
                        r = requests.get(siteURL,
                                         headers=user_agent,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except ValueError:
                        # No user-agent configured
                        r = requests.get(siteURL,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except requests.exceptions.Timeout:
                        lastHTTPcode = "timeout"
                    except requests.exceptions.ConnectionError:
                        lastHTTPcode = "aborted"
                    except:
                        lastHTTPcode = "---"
                        pass
                except:
                    # Unknown status code
                    err = sys.exc_info()
                    LOG.error("Connection error: " + str(err))
                    pass

                LOG.info(siteURL + " " + siteDomain + " " + IPaddress + " " +
                         source_url + " " + now + " " + lastHTTPcode)
                SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                                   source_url, now, lastHTTPcode)

            else:
                LOG.debug("Entry still known: " + siteURL)
                pass

    except:
        err = sys.exc_info()
        LOG.error("HTML parser Error! " + str(err))
Beispiel #6
0
def PhishtankExtractor(phishtank_file, SearchString, LOG, SQL, TABLEname,
                       PROXY, UAFILE):
    UAG = UAgent()
    # Search in Phishtank JSON file
    file = json.loads(open(phishtank_file).read())
    for entry in file:
        # Search
        if SearchString in entry['url']:
            # remove URL containing UID-style strings
            siteURL = re.split("(?:[0-9a-fA-F]:?){32}", entry['url'])[0]
            dn = dirname(siteURL)

            # Test if entry still exist in DB
            if SQL.SQLiteVerifyEntry(TABLEname, dn) is 0:

                IPaddress = entry['details'][0]['ip_address']
                source_url = entry['phish_detail_url']
                siteDomain = urlparse(entry['url']).netloc
                now = str(TimestampNow().Timestamp())
                try:
                    IPaddress = socket.gethostbyname(siteDomain)
                # can't resolv
                except:
                    IPaddress = ""

                # HTTP connection
                try:
                    proxies = {'http': PROXY, 'https': PROXY}
                    UA = UAG.ChooseUA(UAFILE)
                    user_agent = {'User-agent': UA}
                    try:
                        r = requests.get(siteURL,
                                         headers=user_agent,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except ValueError:
                        # No user-agent configured
                        r = requests.get(siteURL,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except requests.exceptions.Timeout:
                        lastHTTPcode = "timeout"
                    except requests.exceptions.ConnectionError:
                        lastHTTPcode = "aborted"
                    except:
                        lastHTTPcode = "---"
                        err = sys.exc_info()
                        LOG.error("HTTP error: " + str(err))
                        pass
                except:
                    # Unknown status code
                    err = sys.exc_info()
                    LOG.error("Connection error: " + str(err))
                    pass
                LOG.info(siteURL)
                SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                                   source_url, now, lastHTTPcode)

            else:
                LOG.debug("Entry still known: " + siteURL)
                pass
        else:
            pass
Beispiel #7
0
def TryPKDownload(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname, DLDir, SQL, PROXY, LOG, UAFILE):
    global ziplist
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()

    PsiteURL = None
    ResiteURL = siteURL
    PsiteURL = urlparse(ResiteURL)
    if len(PsiteURL.path.split("/")[1:]) >= 2:
        siteURL = ResiteURL.rsplit('/', 1)[0]
    else:
        siteURL = ResiteURL

    # Let's try to find a phishing kit source archive
    try:
        r = requests.get(siteURL, headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5, 12), verify=False)

        if (str(r.status_code) != "404"):
            LOG.info("[" + str(r.status_code) + "] " + r.url)
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
            if SQL.SQLiteInvestigVerifyEntry(InvTABLEname, siteDomain, IPaddress) is 0:
                SQL.SQLiteInvestigInsert(InvTABLEname, siteURL, siteDomain, IPaddress, now, str(r.status_code))
            else:
                pass
            ziplist = []
            path = siteURL
            pathl = '/' .join(path.split("/")[:3])
            pathlist = path.split("/")[3:]

            # Make list
            current = 0
            newpath = ""
            while current < len(pathlist):
                if current == 0:
                    newpath = pathlist[current]
                else:
                    newpath = newpath + "/" + pathlist[current]
                current = current + 1
                pathD = pathl + "/" + newpath
                ziplist.append(pathD)

            # Get page title
            try:
                if len(ziplist) >= 1:
                    rhtml = requests.get(siteURL, headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5, 12), verify=False)
                    thtml = rhtml.text
                    tit = re.search('<title>(.*?)</title>', thtml, re.IGNORECASE)
                    if tit is not None:
                        PageTitle = tit.group(1)
                        LOG.info(PageTitle)
                        SQL.SQLiteInvestigUpdateTitle(InvTABLEname, siteURL, PageTitle)
                    else:
                        pass
            except AttributeError:
                pass
            except requests.exceptions.ReadTimeout:
                pass
            except:
                err = sys.exc_info()
                LOG.error("Get PageTitle Error: " + siteURL + str(err))

            try:
                # Try too find and download phishing kit archive (.zip)
                if len(ziplist) >= 1:
                    for zip in ziplist:
                        if (' = ' or '%' or '?' or '-' or '@') not in os.path.basename(os.path.normpath(zip)):
                            try:
                                LOG.info("trying " + zip + ".zip")
                                rz = requests.get(zip + ".zip", headers=user_agent, proxies=proxies, allow_redirects=True, timeout=(5, 12), verify=False)
                                if str(rz.status_code) != "404":
                                    lastHTTPcode = str(rz.status_code)
                                    zzip = zip.replace('/', '_').replace(':', '')
                                    try:
                                        if "application/zip" in rz.headers['content-type'] or "application/octet-stream" in rz.headers['content-type']:
                                            savefile = DLDir + zzip + '.zip'
                                            # Still collected file
                                            if os.path.exists(savefile):
                                                LOG.info("[DL ] Found still collected archive: " + savefile)
                                                return
                                            # New file to download
                                            else:
                                                LOG.info("[DL ] Found archive, downloaded it as: " + savefile)
                                                with open(savefile, "wb") as code:
                                                    code.write(rz.content)
                                                    pass
                                                ZipFileName = str(zzip + '.zip')
                                                ZipFileHash = SHA.hashFile(savefile)
                                                SQL.SQLiteInvestigUpdatePK(InvTABLEname, siteURL, ZipFileName, ZipFileHash, now, lastHTTPcode)
                                                return
                                        else:
                                            pass
                                    except requests.exceptions.ContentDecodingError:
                                        LOG.error("[DL ] content-type error")
                                    except:
                                        pass
                                # 404
                                else:
                                    pass
                            except requests.exceptions.ReadTimeout:
                                LOG.debug("Connection Timeout: " + siteURL)
                            except requests.exceptions.ConnectTimeout:
                                LOG.debug("Connection Timeout")
                            except:
                                err = sys.exc_info()
                                LOG.error("Error: " + str(err))
                                print("Error: " + str(err))
                                pass
                            # else:
                            #   pass
                        else:
                            pass
                    else:
                        pass
                # Ziplist empty
                else:
                    pass
            except:
                err = sys.exc_info()
                LOG.error("DL Error: " + str(err))

        else:
            LOG.debug("[" + str(r.status_code) + "] " + r.url)
            lastHTTPcode = str(r.status_code)
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, lastHTTPcode)
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)

    except requests.exceptions.ConnectionError:
        err = sys.exc_info()
        if '0x05: Connection refused' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Conn. refused')
        if '0x04: Host unreachable' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Unreachable')
        else:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Conn. error')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection error: " + siteURL)

    except requests.exceptions.ConnectTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Conn. timeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Timeout: " + siteURL)

    except requests.exceptions.ReadTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Conn. readtimeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Read Timeout: " + siteURL)

    except requests.exceptions.MissingSchema:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Malformed URL')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Malformed URL, skipping: " + siteURL + "\n")

    except:
        err = sys.exc_info()
        LOG.error("Error: " + str(err))
Beispiel #8
0
def PKDownloadOpenDir(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname,
                      DLDir, SQL, PROXY, LOG, UAFILE, ASN):
    global Ziplst
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()
    Ziplst = []

    rhtml = requests.get(siteURL,
                         headers=user_agent,
                         proxies=proxies,
                         allow_redirects=True,
                         timeout=(5, 12),
                         verify=False)
    thtml = BeautifulSoup(rhtml.text, 'html.parser')
    try:
        PageTitle = thtml.title.text.strip()
    except:
        PageTitle = None
    if PageTitle is not None:
        PageTitle = re.sub('\s+', ' ', PageTitle)
        SQL.SQLiteInvestigUpdateTitle(InvTABLEname, siteURL, PageTitle)
    else:
        pass

    thtmlatag = thtml.select('a')
    Ziplst += [
        siteURL + "/" + tag['href'] for tag in thtmlatag if '.zip' in tag.text
    ]

    for f in Ziplst:
        try:
            r = requests.get(f,
                             headers=user_agent,
                             proxies=proxies,
                             allow_redirects=True,
                             timeout=(5, 12),
                             verify=False)
            lastHTTPcode = str(r.status_code)
            # Reduce filename lenght
            if len(f) > 250:
                zzip = f.replace('/', '_').replace(':', '')[:250]
            else:
                zzip = f.replace('/', '_').replace(':', '')
            try:
                savefile = DLDir + zzip
                # Still collected file
                if os.path.exists(savefile):
                    LOG.info("[DL ] Found still collected archive: " +
                             savefile)
                # New file to download
                else:
                    if zipfile.is_zipfile(io.BytesIO(r.content)):
                        LOG.info(
                            "[DL ] Found archive in an open dir, downloaded it as: "
                            + savefile)
                        with open(savefile, "wb") as code:
                            code.write(r.content)
                            pass
                        ZipFileName = str(zzip + '.zip')
                        ZipFileHash = SHA.hashFile(savefile)
                        # Extract e-mails from downloaded file
                        try:
                            ZS = ZipSearch()
                            extracted_emails = str(
                                ZS.PKzipSearch(InvTABLEname, SQL, LOG, DLDir,
                                               savefile)).strip("[]").replace(
                                                   "'", "")
                            LOG.info(
                                "[Emails] found: {}".format(extracted_emails))
                            SQL.SQLiteInvestigInsertEmail(
                                InvTABLEname, extracted_emails, ZipFileName)
                        except Exception as e:
                            LOG.info(
                                "Extracted emails exception: {}".format(e))

                        SQL.SQLiteInvestigUpdatePK(InvTABLEname, siteURL,
                                                   ZipFileName, ZipFileHash,
                                                   now, lastHTTPcode)
                    else:
                        pass
            except Exception as e:
                LOG.error("Error downloading file: {}".format(e))
        except requests.exceptions.ContentDecodingError:
            LOG.error("[DL ] content-type error")
Beispiel #9
0
def TryPKDownload(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname,
                  DLDir, SQL, PROXY, LOG, UAFILE, ASN):
    global ziplist
    global PageTitle
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()

    # Let's try to find a phishing kit source archive
    try:
        r = requests.get(siteURL,
                         headers=user_agent,
                         proxies=proxies,
                         allow_redirects=True,
                         timeout=(5, 12),
                         verify=False)
        # Generate page hash
        try:
            soup = BeautifulSoup(r.content, 'lxml')
            # Body hash only
            # body = soup.find('body')
            try:
                #page_body = body.findChildren()
                page_body = soup
            except:
                # print(r.content) ## print, frequently, JS in <head>
                pass
            try:
                sha1 = hashlib.sha1()
                sha1.update(repr(page_body).encode("utf-8"))
                PageHash = sha1.hexdigest()
                SQL.SQLiteInsertPageHash(TABLEname, siteURL, PageHash)
            except:
                pass
        except Exception as e:
            print(e)

        # if (str(r.status_code) != "404"):
        LOG.info("[" + str(r.status_code) + "] " + r.url)
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        if SQL.SQLiteInvestigVerifyEntry(InvTABLEname, siteDomain,
                                         IPaddress) is 0:
            SQL.SQLiteInvestigInsert(InvTABLEname, siteURL, siteDomain,
                                     IPaddress, now, str(r.status_code))
        else:
            pass
        ziplist = []
        path = siteURL
        pathl = '/'.join(path.split("/")[:3])
        pathlist = path.split("/")[3:]

        # Make list
        current = 0
        newpath = ""
        while current < len(pathlist):
            if current == 0:
                newpath = pathlist[current]
            else:
                newpath = newpath + "/" + pathlist[current]
            current = current + 1
            pathD = pathl + "/" + newpath
            ziplist.append(pathD)
            rootpath = pathl + "/"
            if rootpath != pathD:
                ziplist.append(rootpath)
            else:
                pass

        # Get page title
        try:
            if len(ziplist) >= 1:
                rhtml = requests.get(siteURL,
                                     headers=user_agent,
                                     proxies=proxies,
                                     allow_redirects=True,
                                     timeout=(5, 12),
                                     verify=False)
                thtml = BeautifulSoup(rhtml.text, 'html.parser')
                try:
                    PageTitle = thtml.title.text.strip()
                except:
                    PageTitle = None
                if PageTitle is not None:
                    PageTitle = re.sub('\s+', ' ', PageTitle)
                    LOG.info(PageTitle)
                    SQL.SQLiteInvestigUpdateTitle(InvTABLEname, siteURL,
                                                  PageTitle)
                else:
                    pass
        except AttributeError:
            pass
        except requests.exceptions.ReadTimeout:
            pass
        except requests.exceptions.ConnectTimeout:
            pass
        except:
            err = sys.exc_info()
            LOG.error("Get PageTitle Error: " + siteURL + str(err))

        # Try to find and download phishing kit archive (.zip)
        try:
            if len(ziplist) >= 1:
                for zip in ziplist:
                    if (' = ' or '%' or '?' or '-'
                            or '@') not in os.path.basename(
                                os.path.normpath(zip)):
                        try:
                            # if URL is not rootpath siteURL
                            if int(len(zip.split("/")[3:][0])) > 0:
                                LOG.info("trying " + zip + ".zip")
                                # Try to use cfscraper if Cloudflare's check
                                if "Cloudflare" in PageTitle:
                                    scraper = cfscrape.create_scraper()
                                    rz = scraper.get(zip + ".zip",
                                                     headers=user_agent,
                                                     proxies=proxies,
                                                     allow_redirects=True,
                                                     timeout=(5, 12),
                                                     verify=False)
                                else:
                                    rz = requests.get(zip + ".zip",
                                                      headers=user_agent,
                                                      proxies=proxies,
                                                      allow_redirects=True,
                                                      timeout=(5, 12),
                                                      verify=False)
                                    # if str(rz.status_code) != "404":
                                    lastHTTPcode = str(rz.status_code)
                                    # Reduce filename lenght
                                    if len(zip) > 250:
                                        zzip = zip.replace('/', '_').replace(
                                            ':', '')[:250]
                                    else:
                                        zzip = zip.replace('/', '_').replace(
                                            ':', '')
                                    try:
                                        savefile = DLDir + zzip + '.zip'
                                        # Still collected file
                                        if os.path.exists(savefile):
                                            LOG.info(
                                                "[DL ] Found still collected archive: "
                                                + savefile)
                                            return
                                        # New file to download
                                        else:
                                            if zipfile.is_zipfile(
                                                    io.BytesIO(rz.content)):
                                                LOG.info(
                                                    "[DL ] Found archive, downloaded it as: "
                                                    + savefile)
                                                with open(savefile,
                                                          "wb") as code:
                                                    code.write(rz.content)
                                                    pass
                                                ZipFileName = str(zzip +
                                                                  '.zip')
                                                ZipFileHash = SHA.hashFile(
                                                    savefile)
                                                SQL.SQLiteInvestigUpdatePK(
                                                    InvTABLEname, siteURL,
                                                    ZipFileName, ZipFileHash,
                                                    now, lastHTTPcode)
                                                # Extract e-mails from downloaded file
                                                try:
                                                    ZS = ZipSearch()
                                                    extracted_emails = str(
                                                        ZS.PKzipSearch(
                                                            InvTABLEname, SQL,
                                                            LOG, DLDir,
                                                            savefile)).strip(
                                                                "[]").replace(
                                                                    "'", "")
                                                    LOG.info(
                                                        "[Email] Found: {}".
                                                        format(
                                                            extracted_emails))
                                                    SQL.SQLiteInvestigInsertEmail(
                                                        InvTABLEname,
                                                        extracted_emails,
                                                        ZipFileName)
                                                except Exception as e:
                                                    LOG.info(
                                                        "Extracted emails exception: {}"
                                                        .format(e))
                                                return
                                            else:
                                                pass
                                    except requests.exceptions.ContentDecodingError:
                                        LOG.error("[DL ] content-type error")
                                    except:
                                        pass

                            # rootpath of siteURL
                            else:
                                rr = requests.get(zip,
                                                  headers=user_agent,
                                                  proxies=proxies,
                                                  allow_redirects=True,
                                                  timeout=(5, 12),
                                                  verify=False)
                                thtml = BeautifulSoup(rr.text, 'html.parser')
                                try:
                                    PageTitle = thtml.title.text.strip()
                                except:
                                    PageTitle = None
                                if PageTitle is not None:
                                    PageTitle = re.sub('\s+', ' ', PageTitle)
                                else:
                                    pass

                        except requests.exceptions.ReadTimeout:
                            LOG.debug("Connection Timeout: " + siteURL)
                        except requests.exceptions.ConnectTimeout:
                            LOG.debug("Connection Timeout")
                        except Exception as e:
                            LOG.error("Error Downloading zip: {}".format(e))
                            pass

                        # Search for OpenDir
                        try:
                            if PageTitle is not None:
                                # OpenDir's zip search
                                if 'Index of' in PageTitle:
                                    PKDownloadOpenDir(zip, siteDomain,
                                                      IPaddress, TABLEname,
                                                      InvTABLEname, DLDir, SQL,
                                                      PROXY, LOG, UAFILE, ASN)
                                # 000webhostapp OpenDir-like zip search
                                elif '.000webhostapp.com Free Website' in PageTitle:
                                    PKDownloadOpenDir(zip, siteDomain,
                                                      IPaddress, TABLEname,
                                                      InvTABLEname, DLDir, SQL,
                                                      PROXY, LOG, UAFILE, ASN)
                                else:
                                    pass
                            else:
                                pass
                        except Exception as e:
                            LOG.error("Potential OpenDir connection error: " +
                                      str(e))
                            pass

                    else:
                        pass
                else:
                    pass
            # Ziplist empty
            else:
                pass
        except Exception as e:
            LOG.error("DL Error: " + str(e))

    except requests.exceptions.ConnectionError:
        err = sys.exc_info()
        if '0x05: Connection refused' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Conn. refused')
        if '0x04: Host unreachable' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Unreachable')
        else:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Conn. error')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection error: " + siteURL)

    except requests.exceptions.ConnectTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Conn. timeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Timeout: " + siteURL)

    except requests.exceptions.ReadTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Conn. readtimeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Read Timeout: " + siteURL)

    except requests.exceptions.MissingSchema:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Malformed URL')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Malformed URL, skipping: " + siteURL)

    except requests.exceptions.InvalidURL:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Malformed URL')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Malformed URL, skipping: " + siteURL)

    except requests.exceptions.ChunkedEncodingError:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Can\'t read data')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Can't read data, skipping: " + siteURL)

    except requests.exceptions.TooManyRedirects:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Too many redirects')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Too many redirects, skipping: " + siteURL)

    except KeyboardInterrupt:
        LOG.info("Shutdown requested...exiting")
        os._exit(0)

    except Exception as e:
        LOG.error("Error trying to find kit: " + str(e))
Beispiel #10
0
def SiteURLSQL(phishtank_file, entry, LOG, SQL, TABLEname, PROXY, UAFILE, UAG):
    # remove URL containing UID-style strings
    siteURL = quote(re.split("(?:[0-9a-fA-F]:?){32}", entry['url'])[0], ':/')
    dn = dirname(siteURL)

    # Test if entry still exist in DB
    if SQL.SQLiteVerifyEntry(TABLEname, dn) is 0:

        IPaddress = entry['details'][0]['ip_address']
        source_url = entry['phish_detail_url']
        siteDomain = urlparse(entry['url']).netloc
        now = str(TimestampNow().Timestamp())
        try:
            IPaddress = socket.gethostbyname(siteDomain)
            if IPaddress:
                rASN = NetInfo()
                ASN = rASN.GetASN(IPaddress).strip('\"')
            else:
                pass
        # can't resolv
        except:
            IPaddress = ""
            ASN = ""

        # HTTP connection
        try:
            proxies = {'http': PROXY, 'https': PROXY}
            UA = UAG.ChooseUA(UAFILE)
            user_agent = {'User-agent': UA}
            try:
                r = requests.get(siteURL,
                                 headers=user_agent,
                                 proxies=proxies,
                                 allow_redirects=True,
                                 timeout=(5, 12))
                # Follow redirect and add new URI to database
                if (len(r.history) > 1) and ("301" in str(
                        r.history[-1])) and (siteURL != r.url) and (
                            siteURL.split('/')[:-1] != r.url.split('/')[:-2]
                        ) and (siteURL + '/' != r.url):
                    lastHTTPcode = str(r.status_code)
                    SQL.SQLiteInsertPK(TABLEname, r.url, siteDomain, IPaddress,
                                       source_url, now, lastHTTPcode, ASN)
                else:
                    pass
                lastHTTPcode = str(r.status_code)
            except ValueError:
                # No user-agent configured
                r = requests.get(siteURL,
                                 proxies=proxies,
                                 allow_redirects=True,
                                 timeout=(5, 12))
                lastHTTPcode = str(r.status_code)
            except requests.exceptions.Timeout:
                lastHTTPcode = "timeout"
            except requests.exceptions.ConnectionError:
                lastHTTPcode = "aborted"
            except:
                lastHTTPcode = "---"
                err = sys.exc_info()
                LOG.error("HTTP error: " + str(err))
                pass
        except:
            # Unknown status code
            err = sys.exc_info()
            LOG.error("Connection error: " + str(err))
            pass

        # Add data into database
        LOG.info(siteURL)
        SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                           source_url, now, lastHTTPcode, ASN)

    else:
        LOG.debug("Entry still known: " + siteURL)
        pass
Beispiel #11
0
def UrlscanExtractor(LOG, SQL, TABLEname, PROXY, UAFILE):
    UAG = UAgent()
    ## Search in Urlquery HTML file
    try:
        for item in HTMLText['results']:
            # remove URL containing UID-style strings
            siteURL = re.split("(?:[0-9a-fA-F]:?){32}", item['page']['url'])[0]
            dn = dirname(siteURL)

            ## Test if entry still exist in DB
            if SQL.SQLiteVerifyEntry(TABLEname, dn) is 0:
                source_url = item['result'].replace("/api/v1", "")
                siteDomain = urlparse(item['page']['url']).netloc

                try:
                    IPaddress = socket.gethostbyname(siteDomain)
                # can't resolv
                except:
                    IPaddress = ""

                now = str(TimestampNow().Timestamp())

                # HTTP connection
                try:
                    proxies = {'http': PROXY, 'https': PROXY}
                    UA = UAG.ChooseUA(UAFILE)
                    user_agent = {'User-agent': UA}
                    try:
                        r = requests.get(siteURL,
                                         headers=user_agent,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except ValueError:
                        # No user-agent configured
                        r = requests.get(siteURL,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12))
                        lastHTTPcode = str(r.status_code)
                    except requests.exceptions.Timeout:
                        lastHTTPcode = "timeout"
                    except requests.exceptions.ConnectionError:
                        lastHTTPcode = "aborted"
                    except:
                        lastHTTPcode = "---"
                        pass
                except:
                    # Unknown status code
                    err = sys.exc_info()
                    LOG.error("Connection error: " + str(err))
                    pass

                LOG.info(siteURL + " " + siteDomain + " " + IPaddress + " " +
                         source_url + " " + now + " " + lastHTTPcode)
                SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                                   source_url, now, lastHTTPcode)

            else:
                LOG.debug("Entry still known: " + siteURL)
                pass

    except:
        err = sys.exc_info()
        LOG.error("HTML parser Error! " + str(err))
Beispiel #12
0
def SiteURLSQL(SearchString, line, LOG, SQL, TABLEname, PROXY, UAFILE, UAG):
    # remove URL containing UID-style strings
    siteURL = quote(re.split("(?:[0-9a-fA-F]:?){32}", line[0])[0], ':/')
    if siteURL.startswith('https:'):
        siteDomain = siteURL.split('/')[2]
    else:
        siteDomain = siteURL.split('/')[0]
        siteURL = "http://" + siteURL
    dn = dirname(siteURL)

    # Test if entry still exist in DB
    if SQL.SQLiteVerifyEntry(TABLEname, dn) is 0:
        # Proceed to informations retrieve
        now = str(TimestampNow().Timestamp())
        source_url = "https://urlquery.net/" + line[1]
        try:
            IPaddress = socket.gethostbyname(siteDomain)
            if IPaddress:
                rASN = NetInfo()
                ASN = rASN.GetASN(IPaddress).strip('\"')
            else:
                pass
        # can't resolv
        except:
            IPaddress = ""
            ASN = ""

        # HTTP connection
        try:
            proxies = {'http': PROXY, 'https': PROXY}
            UA = UAG.ChooseUA(UAFILE)
            user_agent = {'User-agent': UA}
            try:
                r = requests.get(siteURL,
                                 headers=user_agent,
                                 proxies=proxies,
                                 allow_redirects=True)
                # Follow redirect and add new URI to database
                if (len(r.history) > 1) and ("301" in str(
                        r.history[-1])) and (siteURL != r.url) and (
                            siteURL.split('/')[:-1] != r.url.split('/')[:-2]
                        ) and (siteURL + '/' != r.url):
                    lastHTTPcode = str(r.status_code)
                    SQL.SQLiteInsertPK(TABLEname, r.url, siteDomain, IPaddress,
                                       source_url, now, lastHTTPcode, ASN)
                else:
                    pass
                lastHTTPcode = str(r.status_code)
            except ValueError:
                # No user-agent configured
                r = requests.get(siteURL,
                                 proxies=proxies,
                                 allow_redirects=True)
                lastHTTPcode = str(r.status_code)
            except requests.exceptions.Timeout:
                lastHTTPcode = "timeout"
            except requests.exceptions.ConnectionError:
                lastHTTPcode = "aborted"
            except:
                lastHTTPcode = "---"
                pass
        except Exception as e:
            # Unknown status code
            LOG.error("Connection error: {}".format(e))
            pass

        # Add data into database
        LOG.info(siteURL + " " + siteDomain + " " + IPaddress + " " +
                 source_url + " " + now + " " + lastHTTPcode)
        SQL.SQLiteInsertPK(TABLEname, siteURL, siteDomain, IPaddress,
                           source_url, now, lastHTTPcode, ASN)

    else:
        LOG.debug("Entry still known: " + siteURL)
        pass
Beispiel #13
0
def TryPKDownload(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname,
                  DLDir, SQL, PROXY, LOG, UAFILE):
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()

    try:
        r = requests.get(siteURL,
                         headers=user_agent,
                         proxies=proxies,
                         allow_redirects=True,
                         timeout=(5, 12))
        LOG.info("[" + str(r.status_code) + "] " + r.url)

        if str(r.status_code) == "200":
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
            if SQL.SQLiteInvestigVerifyEntry(InvTABLEname, siteDomain,
                                             IPaddress) is 0:
                SQL.SQLiteInvestigInsert(InvTABLEname, siteURL, siteDomain,
                                         IPaddress, now, '200')

            else:
                pass
            ziplist = []
            path = siteURL
            pathl = '/'.join(path.split("/")[:3])
            pathlist = path.split("/")[3:]

            # Make list
            current = 0
            newpath = ""
            while current < len(pathlist):
                if current == 0:
                    newpath = pathlist[current]
                else:
                    newpath = newpath + "/" + pathlist[current]
                current = current + 1
                pathD = pathl + "/" + newpath
                ziplist.append(pathD)

            try:
                # Try too find and download phishing kit archive (.zip)
                if len(ziplist) > 1:
                    for zip in ziplist:
                        if ('=' or '%' or '?' or '-' or '@'
                                or '.') not in os.path.basename(
                                    os.path.normpath(zip)):
                            if ('/') not in zip[-1:] and ('.') not in zip[-3:]:
                                try:
                                    LOG.info("trying " + zip + ".zip")
                                    rz = requests.get(zip + ".zip",
                                                      headers=user_agent,
                                                      proxies=proxies,
                                                      allow_redirects=True,
                                                      timeout=(5, 12))
                                    if str(rz.status_code) != "404":
                                        lastHTTPcode = str(rz.status_code)
                                        zzip = zip.replace('/', '_').replace(
                                            ':', '')
                                        if "application/zip" in rz.headers[
                                                'content-type'] or "application/octet-stream" in rz.headers[
                                                    'content-type']:
                                            savefile = DLDir + zzip + '.zip'
                                            # Still collected file
                                            if os.path.exists(savefile):
                                                LOG.info(
                                                    "[DL ] Found still collected archive: "
                                                    + savefile)
                                            # New file to download
                                            else:
                                                LOG.info(
                                                    "[DL ] Found archive, downloaded it as: "
                                                    + savefile)
                                                with open(savefile,
                                                          "wb") as code:
                                                    code.write(rz.content)
                                                    pass
                                                ZipFileName = str(zzip +
                                                                  '.zip')
                                                ZipFileHash = SHA.hashFile(
                                                    savefile)
                                                SQL.SQLiteInvestigUpdatePK(
                                                    InvTABLEname, siteURL,
                                                    ZipFileName, ZipFileHash,
                                                    now, lastHTTPcode)
                                        else:
                                            pass
                                    # 404
                                    else:
                                        pass
                                except:
                                    err = sys.exc_info()
                                    LOG.error("Error: " + str(err))
                                    print("Error: " + str(err))
                                    pass
                            else:
                                pass
                        else:
                            pass
                    else:
                        pass
                # Ziplist empty
                else:
                    pass
            except:
                pass

        elif str(r.status_code) == "404":
            lastHTTPcode = str(r.status_code)
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         lastHTTPcode)
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        else:
            lastHTTPcode = str(r.status_code)
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         lastHTTPcode)

    except requests.exceptions.ConnectionError:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Err')
        LOG.debug("Connection error: " + siteURL)

    except requests.exceptions.ConnectTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'To')
        LOG.debug("Connection Timeout: " + siteURL)

    except requests.exceptions.ReadTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'RTo')
        LOG.debug("Connection Read Timeout: " + siteURL)

    except requests.exceptions.MissingSchema:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now, 'Err')
        LOG.debug("Malformed URL, skipping: " + siteURL + "\n")

    except:
        err = sys.exc_info()
        LOG.error("Error: " + str(err))
Beispiel #14
0
def TryPKDownload(siteURL, siteDomain, IPaddress, TABLEname, InvTABLEname,
                  DLDir, SQL, PROXY, LOG, UAFILE):
    global ziplist
    proxies = {'http': PROXY, 'https': PROXY}
    UAG = UAgent()
    UA = UAG.ChooseUA(UAFILE)
    user_agent = {'User-agent': UA}
    now = str(TimestampNow().Timestamp())
    SHA = SHA256()

    PsiteURL = None
    ResiteURL = siteURL
    PsiteURL = urlparse(ResiteURL)
    if len(PsiteURL.path.split("/")[1:]) >= 2:
        siteURL = ResiteURL.rsplit('/', 1)[0]
    else:
        siteURL = ResiteURL

    # Let's try to find a phishing kit source archive
    try:
        r = requests.get(siteURL,
                         headers=user_agent,
                         proxies=proxies,
                         allow_redirects=True,
                         timeout=(5, 12),
                         verify=False)

        if (str(r.status_code) != "404"):
            LOG.info("[" + str(r.status_code) + "] " + r.url)
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
            if SQL.SQLiteInvestigVerifyEntry(InvTABLEname, siteDomain,
                                             IPaddress) is 0:
                SQL.SQLiteInvestigInsert(InvTABLEname, siteURL, siteDomain,
                                         IPaddress, now, str(r.status_code))
            else:
                pass
            ziplist = []
            path = siteURL
            pathl = '/'.join(path.split("/")[:3])
            pathlist = path.split("/")[3:]

            # Make list
            current = 0
            newpath = ""
            while current < len(pathlist):
                if current == 0:
                    newpath = pathlist[current]
                else:
                    newpath = newpath + "/" + pathlist[current]
                current = current + 1
                pathD = pathl + "/" + newpath
                ziplist.append(pathD)

            # Get page title
            try:
                if len(ziplist) >= 1:
                    rhtml = requests.get(siteURL,
                                         headers=user_agent,
                                         proxies=proxies,
                                         allow_redirects=True,
                                         timeout=(5, 12),
                                         verify=False)
                    thtml = rhtml.text
                    tit = re.search('<title>(.*?)</title>', thtml,
                                    re.IGNORECASE)
                    if tit is not None:
                        PageTitle = tit.group(1)
                        LOG.info(PageTitle)
                        SQL.SQLiteInvestigUpdateTitle(InvTABLEname, siteURL,
                                                      PageTitle)
                    else:
                        pass
            except AttributeError:
                pass
            except requests.exceptions.ReadTimeout:
                pass
            except:
                err = sys.exc_info()
                LOG.error("Get PageTitle Error: " + siteURL + str(err))

            # Set redis to record URL Done list
            redis_set = redis.Redis(db=1)
            redis_set.sadd('StalkPhishURLs', 0)

            # Try to retrieve all possible path for one url and find whether there are .zip files
            try:
                if len(ziplist) >= 1:
                    for url in [pathl] + ziplist:
                        if redis_set.sismember('StalkPhishURLs', url):
                            continue
                        LOG.info("Retrieving Path " + url)
                        urllist = RetriveIndexPath(url, proxies, user_agent,
                                                   [])
                        for urlzip in urllist:
                            if redis_set.sismember('StalkPhishURLs', urlzip):
                                continue
                            LOG.info("trying " + urlzip)
                            rz = requests.get(urlzip,
                                              headers=user_agent,
                                              proxies=proxies,
                                              allow_redirects=True,
                                              timeout=(5, 12),
                                              verify=False)
                            if str(rz.status_code) != "404":
                                lastHTTPcode = str(rz.status_code)
                                zzip = urlzip.replace('/',
                                                      '_').replace(':', '')
                                try:
                                    if "application/zip" in rz.headers[
                                            'content-type'] or "application/octet-stream" in rz.headers[
                                                'content-type']:
                                        savefile = DLDir + zzip
                                        # Still collected file
                                        if os.path.exists(savefile):
                                            LOG.info(
                                                "[DL ] Found still collected archive: "
                                                + savefile)
                                        # New file to download
                                        else:
                                            LOG.info(
                                                "[DL ] Found archive, downloaded it as: "
                                                + savefile)
                                            with open(savefile, "wb") as code:
                                                code.write(rz.content)
                                                pass
                                            ZipFileName = str(zzip)
                                            ZipFileHash = SHA.hashFile(
                                                savefile)
                                            SQL.SQLiteInvestigUpdatePK(
                                                InvTABLEname, siteURL,
                                                ZipFileName, ZipFileHash, now,
                                                lastHTTPcode)
                                    else:
                                        pass
                                except requests.exceptions.ContentDecodingError:
                                    LOG.error("[DL ] content-type error")
                                except:
                                    pass
                                # 404
                            else:
                                pass
                        redis_set.sadd('StalkPhishURLs', *urllist, url)
            except:
                err = sys.exc_info()
                LOG.error("DL Error: " + str(err))

            # Add popular Kit names into ziplist
            try:
                import pandas as pd
                perct = 0.1
                topNames = pd.read_csv(
                    '/home/chaoxu18/StalkPhish/stalkphish/ExtracKitNameCount.csv'
                )
                topNamesNum = round(perct * len(topNames))
                if topNamesNum > 0:
                    topNames = topNames['kitname'].values[:topNamesNum]
                    for topname in topNames:
                        ziplist.append(siteURL + '/' + topname + '.zip')
            except:
                LOG.error("Top Kit Name Error: " + str(err))
                pass

            try:
                # Try too find and download phishing kit archive (.zip)
                if len(ziplist) >= 1:
                    for zip in ziplist:
                        if redis_set.sismember('StalkPhishURLs', zip + ".zip"):
                            continue
                        else:
                            redis_set.sadd('StalkPhishURLs', zip + ".zip")
                        if ('=' or '%' or '?' or '-'
                                or '@') not in os.path.basename(
                                    os.path.normpath(zip)):
                            try:
                                LOG.info("trying " + zip + ".zip")
                                rz = requests.get(zip + ".zip",
                                                  headers=user_agent,
                                                  proxies=proxies,
                                                  allow_redirects=True,
                                                  timeout=(5, 12),
                                                  verify=False)
                                if str(rz.status_code) != "404":
                                    lastHTTPcode = str(rz.status_code)
                                    zzip = zip.replace('/',
                                                       '_').replace(':', '')
                                    try:
                                        if "application/zip" in rz.headers[
                                                'content-type'] or "application/octet-stream" in rz.headers[
                                                    'content-type']:
                                            savefile = DLDir + zzip + '.zip'
                                            # Still collected file
                                            if os.path.exists(savefile):
                                                LOG.info(
                                                    "[DL ] Found still collected archive: "
                                                    + savefile)
                                            # New file to download
                                            else:
                                                LOG.info(
                                                    "[DL ] Found archive, downloaded it as: "
                                                    + savefile)
                                                with open(savefile,
                                                          "wb") as code:
                                                    code.write(rz.content)
                                                    pass
                                                ZipFileName = str(zzip +
                                                                  '.zip')
                                                ZipFileHash = SHA.hashFile(
                                                    savefile)
                                                SQL.SQLiteInvestigUpdatePK(
                                                    InvTABLEname, siteURL,
                                                    ZipFileName, ZipFileHash,
                                                    now, lastHTTPcode)
                                        else:
                                            pass
                                    except requests.exceptions.ContentDecodingError:
                                        LOG.error("[DL ] content-type error")
                                    except:
                                        pass
                                # 404
                                else:
                                    pass
                            except requests.exceptions.ReadTimeout:
                                LOG.debug("Connection Timeout: " + siteURL)
                            except requests.exceptions.ConnectTimeout:
                                LOG.debug("Connection Timeout")
                            except:
                                err = sys.exc_info()
                                LOG.error("Error: " + str(err))
                                print("Error: " + str(err))
                                pass
                            # else:
                            # 	pass
                        else:
                            pass
                    else:
                        pass
                # Ziplist empty
                else:
                    pass
            except:
                err = sys.exc_info()
                LOG.error("DL Error: " + str(err))

        else:
            LOG.debug("[" + str(r.status_code) + "] " + r.url)
            lastHTTPcode = str(r.status_code)
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         lastHTTPcode)
            SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)

    except requests.exceptions.ConnectionError:
        err = sys.exc_info()
        if '0x05: Connection refused' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Conn. refused')
        if '0x04: Host unreachable' in err:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Unreachable')
        else:
            SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                         'Conn. error')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection error: " + siteURL)

    except requests.exceptions.ConnectTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Conn. timeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Timeout: " + siteURL)

    except requests.exceptions.ReadTimeout:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Conn. readtimeout')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Connection Read Timeout: " + siteURL)

    except requests.exceptions.MissingSchema:
        SQL.SQLiteInvestigUpdateCode(InvTABLEname, siteURL, now,
                                     'Malformed URL')
        SQL.SQLiteInsertStillTryDownload(TABLEname, siteURL)
        LOG.debug("Malformed URL, skipping: " + siteURL + "\n")

    except:
        err = sys.exc_info()
        LOG.error("Error: " + str(err))