Python Downloadの例、Helpers.Download Pythonの例

コード例 #1

0

ファイルを表示

ファイル: GoogleSearch.py プロジェクト: smaff/simply

 def search(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 1000:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + \
                 str(self.Counter) + "&hl=en&meta=&q=%40\"" + \
                 self.Domain + "\""
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             results = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         try:
             # Url = r.url
             dl.GoogleCaptchaDetection(results)
         except Exception as e:
             print e
         self.Html += results
         self.Counter += 100
         helpers.modsleep(self.Sleep, jitter=self.Jitter)

コード例 #2

0

ファイルを表示

 def search(self):
     dl = Download.Download(self.verbose)
     convert = Converter.Converter(verbose=self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google DOCX Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             urly = "https://www.google.com/search?q=site:" + \
                 self.Domain + "+filetype:docx&start=" + str(self.Counter)
         except Exception as e:
             error = "[!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             r = requests.get(urly)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         RawHtml = r.content
         soup = BeautifulSoup(RawHtml)
         # I use this to parse my results, for URLS to follow
         for a in soup.findAll('a'):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(
                     urlparse.urlparse(a['href']).query)['q'][0]
                 if l.startswith('http') or l.startswith('www'):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
     # now download the required files
     try:
         for url in self.urlList:
             if self.verbose:
                 p = ' [*] Google DOCX search downloading: ' + str(url)
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".docx"
                 FileName, FileDownload = dl.download_file(url, filetype)
                 if FileDownload:
                     if self.verbose:
                         p = ' [*] Google DOCX file was downloaded: ' + \
                             str(url)
                         print helpers.color(p, firewall=True)
                     self.Text += convert.convert_docx_to_txt(FileName)
                 # print self.Text
             except Exception as e:
                 print helpers.color(" [!] Issue with Converting Docx Files\n", firewall=True)
             try:
                 dl.delete_file(FileName)
             except Exception as e:
                 print e
     except:
         print helpers.color(" [*] No DOCX's to download from Google!\n", firewall=True)

コード例 #3

0

ファイルを表示

ファイル: EmailHunter.py プロジェクト: vishalkrtr/SimplyEmail

 def process(self):
     dl = Download.Download(self.verbose)
     try:
         # This returns a JSON object
         url = "https://emailhunter.co/trial/v1/search?offset=0&domain=" + \
             self.domain + "&format=json"
         r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
     except Exception as e:
         error = "[!] Major issue with EmailHunter Search:" + str(e)
         print helpers.color(error, warning=True)
     try:
         results = r.json()
         # pprint(results)
         # Check to make sure we got data back from the API
         if results['status'] == "success":
             # The API starts at 0 for the first value
             x = 0
             EmailCount = int(results['results'])
             # We will itirate of the Json object for the index objects
             while x < EmailCount:
                 self.results.append(results['emails'][int(x)]['value'])
                 x += 1
         if results['status'] == "error":
             # The API starts at 0 for the first value
             error = ' [!] EmailHunter Trial API failed: ' + \
                 str(results['message'])
             self.logger.error('EmailHunter Trial API failed: ' +
                               str(results['message']))
             print helpers.color(error, firewall=True)
     except Exception as e:
         pass
     if self.verbose:
         p = ' [*] EmailHunter completed JSON request'
         print helpers.color(p, firewall=True)

コード例 #4

0

ファイルを表示

 def search(self):
     dl = Download.Download(verbose=self.verbose)
     while self.Counter <= self.Depth and self.Counter <= 100:
         helpers.modsleep(5)
         if self.verbose:
             p = ' [*] GitHubUser Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = 'https://github.com/search?p=' + str(self.Counter) + '&q=' + \
                 str(self.domain) + 'ref=searchresults&type=Users&utf8='
         except Exception as e:
             error = " [!] Major issue with GitHubUser Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             r = dl.requesturl(url,
                               useragent=self.UserAgent,
                               raw=True,
                               timeout=10)
         except Exception as e:
             error = " [!] Fail during Request to GitHubUser (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         results = r.content
         self.Html += results
         self.Counter += 1

コード例 #5

0

ファイルを表示

ファイル: AskSearch.py プロジェクト: smaff/simply

 def process(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.PageLimit:
         if self.verbose:
             p = ' [*] AskSearch on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
             self.logger.info('AskSearch on page: ' + str(self.Counter))
         try:
             url = 'http://www.ask.com/web?q=@' + str(self.Domain) + \
                 '&pu=10&page=' + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Ask Search:" + str(e)
             self.logger.error('Major issue with Ask Search: ' + str(e))
             print helpers.color(error, warning=True)
         try:
             rawhtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Ask (Check Connection):" + \
                 str(e)
             self.logger.error(
                 'Fail during Request to Ask (Check Connection): ' + str(e))
             print helpers.color(error, warning=True)
         self.Html += rawhtml
         self.Counter += 1
         helpers.modsleep(self.Sleep, jitter=self.Jitter)

コード例 #6

0

ファイルを表示

 def search(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 1000:
         time.sleep(1)
         if self.verbose:
             p = ' [*] RedditPost Search on result: ' + str(self.Counter)
             self.logger.debug(
                 "RedditPost Search on result: " + str(self.Counter))
             print helpers.color(p, firewall=True)
         try:
             url = "https://www.reddit.com/search?q=%40" + str(self.Domain) + \
                 "&restrict_sr=&sort=relevance&t=all&count=" + str(self.Counter) + \
                 '&after=t3_3mkrqg'
         except Exception as e:
             error = " [!] Major issue with RedditPost search:" + str(e)
             self.logger.error(
                 "Major issue with RedditPostSearch: " + str(e))
             print helpers.color(error, warning=True)
         try:
             RawHtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Reddit (Check Connection):" + \
                 str(e)
             self.logger.error(
                 "Fail during Request to Reddit (Check Connection): " + str(e))
             print helpers.color(error, warning=True)
         self.Html += RawHtml
         # reddit seems to increment by 25 in cases
         self.Counter += 25

コード例 #7

0

ファイルを表示

ファイル: test_simplyemail_list.py プロジェクト: willvin313/SimplyEmail

def test_downloads():
    # perfrom Download testing
    ua = helpers.getua()
    dl = Download.Download(True)
    html = dl.requesturl(
        'http://google.com', ua, timeout=2, retrytime=3, statuscode=False)
    dl.GoogleCaptchaDetection(html)
    f, download = dl.download_file(
        'http://www.sample-videos.com/doc/Sample-doc-file-100kb.doc', '.pdf')
    dl.delete_file(f)

コード例 #8

0

ファイルを表示

ファイル: CanaryBinSearch.py プロジェクト: thanatoskira/SimplyEmail

 def process(self):
     # Get all the Pastebin raw items
     # https://canary.pw/search/?q=earthlink.net&page=3
     UrlList = []
     dl = Download.Download(verbose=self.verbose)
     while self.Counter <= self.Depth:
         if self.verbose:
             p = ' [*] Canary Search on page: ' + str(self.Counter)
             self.logger.info("CanaryBinSearch on page: " +
                              str(self.Counter))
             print helpers.color(p, firewall=True)
         try:
             url = "https://canary.pw/search/?q=" + str(self.domain) + "&page=" + \
                 str(self.Counter)
             rawhtml, statuscode = dl.requesturl(url,
                                                 useragent=self.UserAgent,
                                                 statuscode=True,
                                                 verify=False)
             if statuscode != 200:
                 break
         except Exception as e:
             error = " [!] Major issue with Canary Pastebin Search:" + \
                 str(e)
             self.logger.error(
                 'Fail during Request to CanaryBinSearch (Check Connection): '
                 + str(e))
             print helpers.color(error, warning=True)
         # Parse the results for our URLS)
         soup = BeautifulSoup(rawhtml)
         for a in soup.findAll('a', href=True):
             a = a['href']
             if a.startswith('/view'):
                 UrlList.append(a)
         self.Counter += 1
     # Now take all gathered URL's and gather the HTML content needed
     Status = " [*] Canary found " + \
         str(len(UrlList)) + " CanaryBin(s) to Search!"
     self.logger.info("CanaryBin found " + str(len(UrlList)) +
                      " CanaryBin(s) to Search!")
     print helpers.color(Status, status=True)
     for item in UrlList:
         try:
             item = "https://canary.pw" + str(item)
             # They can be massive!
             rawhtml = dl.requesturl(item,
                                     useragent=self.UserAgent,
                                     timeout=20)
             self.Html += rawhtml
         except Exception as e:
             error = " [!] Connection Timed out on Canary Pastebin Search:" + \
                 str(e)
             self.logger.error(
                 'Fail during Request to CanaryBinSearch bin (Check Connection): '
                 + str(e))
             print helpers.color(error, warning=True)

コード例 #9

0

ファイルを表示

 def process(self):
     dl = Download.Download(verbose=self.verbose)
     try:
         url = "https://www.flickr.com/search/?text=%40" + self.domain
         rawhtml = dl.requesturl(url, useragent=self.UserAgent)
     except Exception as e:
         error = " [!] Major issue with Flickr Search:" + str(e)
         print helpers.color(error, warning=True)
     self.results += rawhtml
     if self.verbose:
         p = ' [*] FlickrSearch has completed'
         print helpers.color(p, firewall=True)

コード例 #10

0

ファイルを表示

 def process(self):
     dl = Download.Download(verbose=self.verbose)
     # Get all the USER code Repos
     # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓
     UrlList = []
     while self.Counter <= self.Depth:
         if self.verbose:
             p = " [*] GitHub Gist Search Search on page: " + str(
                 self.Counter)
             print helpers.color(p, firewall=True)
         try:
             # search?p=2&q=%40enron.com&ref=searchresults&utf8=✓
             url = ("https://gist.github.com/search?p=" +
                    str(self.Counter) + "&q=%40" + str(self.domain) +
                    "+&ref=searchresults&utf8=✓")
             r = dl.requesturl(url,
                               useragent=self.UserAgent,
                               raw=True,
                               timeout=10)
             if r.status_code != 200:
                 break
         except Exception as e:
             error = " [!] Major issue with GitHubGist Search:" + str(e)
             print helpers.color(error, warning=True)
         RawHtml = r.content
         # Parse the results for our URLS)
         soup = BeautifulSoup(RawHtml)
         for a in soup.findAll("a", href=True):
             a = a["href"]
             if a.startswith("/"):
                 UrlList.append(a)
         self.Counter += 1
     # Now take all gathered URL's and gather the HTML content needed
     for url in UrlList:
         try:
             url = "https://gist.github.com" + url
             html = dl.requesturl(url, useragent=self.UserAgent, timeout=10)
             self.Html += html
         except Exception as e:
             error = " [!] Connection Timed out on GithubGist Search:" + str(
                 e)
             print helpers.color(error, warning=True)

コード例 #11

0

ファイルを表示

ファイル: Hunter.py プロジェクト: smaff/simply

    def process(self):
        dl = Download.Download(self.verbose)
        try:
            # We will check to see that we have enough requests left to make a search
            url = "https://api.hunter.io/v2/account?api_key=" + self.apikeyv
            r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
            accountInfo = r.json()
            quota = int(accountInfo['data']['calls']['available'])
            quotaUsed = int(accountInfo['data']['calls']['used'])
            if quotaUsed >= self.QuotaLimit:
                overQuotaLimit = True
            else:
                overQuotaLimit = False
        except Exception as e:
            error = " [!] Hunter API error: " + str(
                accountInfo['errors'][0]['details'])
            print helpers.color(error, warning=True)
        try:
            # Hunter's API only allows 100 emails per request, so we check the number of emails Hunter has
            # on our specified domain, and if it's over 100 we need to make multiple requests to get all of the emails
            url = "https://api.hunter.io/v2/email-count?domain=" + self.domain
            r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
            response = r.json()
            totalEmails = int(response['data'][self.etype])
            emailsLeft = totalEmails
            offset = 0
        except Exception as e:
            error = "[!] Major issue with Hunter Search: " + str(e)
            print helpers.color(error, warning=True)
        requestsMade = 0
        # Main loop to keep requesting the Hunter API until we get all of the emails they have
        while emailsLeft > 0:
            try:
                if overQuotaLimit or requestsMade + quotaUsed >= self.QuotaLimit:
                    if self.verbose:
                        print helpers.color(" [*] You are over your set Quota Limit: " + \
                            str(quotaUsed) + "/" + str(self.QuotaLimit) + " stopping search", firewall=True)
                    break
                elif self.RequestLimit != 0 and requestsMade >= self.RequestLimit:
                    if self.verbose:
                        print helpers.color(
                            " [*] Stopping search due to user set Request Limit",
                            firewall=True)
                    break

                # This returns a JSON object
                url = "https://api.hunter.io/v2/domain-search?domain=" + \
                    self.domain + self.type + "&limit=100&offset=" + str(offset) + "&api_key=" + self.apikeyv
                r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
                results = r.json()
                emailCount = int(results['meta']['results'])
            except Exception as e:
                error = " [!] Hunter API error: " + str(
                    results['errors'][0]['details']) + " QUITTING!"
                print helpers.color(error, warning=True)
                break
            try:
                # Make sure we don't exceed the index for the 'emails' array in the 'results' Json object
                if emailsLeft < 100:
                    emailCount = emailsLeft
                if emailCount > 100:
                    emailCount = 100
                # 1 request is every 10 emails delivered
                requestsMade += emailCount // 10
                if emailCount % 10 != 0:
                    requestsMade += 1
                # The API starts at 0 for the first value
                x = 0
                # We will itirate of the Json object for the index objects
                while x < emailCount:
                    self.results.append(
                        results['data']['emails'][int(x)]['value'])
                    x += 1
                emailsLeft -= emailCount
                if emailsLeft > 100:
                    offset += 100
                else:
                    offset += emailsLeft
            except Exception as e:
                error = " [!] Major issue with search parsing: " + str(e)
                print helpers.color(error, warning=True)
                break
        if self.verbose:
            # Print the avalible requests user has if verbose
            print helpers.color(' [*] Hunter has completed JSON request',
                                firewall=True)
            requestsUsed = requestsMade + quotaUsed
            if quota - requestsUsed <= 0:
                print helpers.color(" [*] You have no Hunter requests left." \
                    + "They will refill in about a month", firewall=True)
            else:
                print helpers.color(" [*] You have " + str(requestsUsed) \
                    + "/" + str(quota) + " Hunter requests left", firewall=True)

コード例 #12

0

ファイルを表示

ファイル: GoogleCsvSearch.py プロジェクト: smaff/simply

 def search(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google CSV Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = "https://www.google.com/search?q=site:" + \
                 self.Domain + "+filetype:csv&start=" + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             RawHtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         # check for captcha
         try:
             # Url = r.url
             dl.GoogleCaptchaDetection(RawHtml)
         except Exception as e:
             print e
         soup = BeautifulSoup(RawHtml)
         # I use this to parse my results, for URLS to follow
         for a in soup.findAll('a'):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(
                     urlparse.urlparse(a['href']).query)['q'][0]
                 if l.startswith('http') or l.startswith('www'):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
         helpers.modsleep(self.Sleep, jitter=self.Jitter)
     # now download the required files
     try:
         for url in self.urlList:
             if self.verbose:
                 p = ' [*] Google CSV search downloading: ' + str(url)
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".csv"
                 FileName, FileDownload = dl.download_file2(url, filetype)
                 if FileDownload:
                     if self.verbose:
                         p = '[*] Google CSV file was downloaded: ' + \
                             str(url)
                         print helpers.color(p, firewall=True)
                     with open(FileName) as f:
                         self.Text += f.read()
                 # print self.Text
             except Exception as e:
                 print helpers.color(" [!] Issue with opening CSV Files\n", firewall=True)
             try:
                 dl.delete_file(FileName)
             except Exception as e:
                 print e
     except:
         print helpers.color(" [*] No CSV to download from Google!\n", firewall=True)

コード例 #13

0

ファイルを表示

ファイル: GooglePPTXSearch.py プロジェクト: rvazarkar/SimplyEmail

 def search(self):
     convert = Converter.Converter(self.verbose)
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google PPTX Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = "https://www.google.com/search?q=" + \
                 self.Domain + "+filetype:pptx&start=" + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             RawHtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         # check for captcha
         try:
             # Url = r.url
             dl.GoogleCaptchaDetection(RawHtml)
         except Exception as e:
             print e
         soup = BeautifulSoup(RawHtml)
         # I use this to parse my results, for URLS to follow
         for a in soup.findAll('a'):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(urlparse.urlparse(
                     a['href']).query)['q'][0]
                 if l.startswith('http') or l.startswith(
                         'www') or l.startswith('https'):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
                 # for some reason PPTX seems to be cached data:
                 l = urlparse.parse_qs(urlparse.urlparse(
                     a['href']).query)['q'][0]
                 l = l.split(':', 2)
                 if "webcache.googleusercontent.com" not in l[2]:
                     self.urlList.append(l[2])
             except:
                 pass
         self.Counter += 10
     # now download the required files
     try:
         for url in self.urlList:
             if self.verbose:
                 p = ' [*] Google PPTX search downloading: ' + str(url)
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".pptx"
                 FileName, FileDownload = dl.download_file2(url, filetype)
                 if FileDownload:
                     if self.verbose:
                         p = ' [*] Google PPTX file was downloaded: ' + \
                             str(url)
                         print helpers.color(p, firewall=True)
                     ft = helpers.filetype(FileName).lower()
                     if 'powerpoint' in ft:
                         self.Text += convert.convert_pptx_to_txt(FileName)
                     else:
                         self.logger.warning(
                             'Downloaded file is not a PPTX: ' + ft)
                 # print self.Text
             except Exception as e:
                 print helpers.color(" [!] Issue with opening PPTX Files\n",
                                     firewall=True)
             try:
                 dl.delete_file(FileName)
             except Exception as e:
                 print e
     except:
         print helpers.color(" [*] No PPTX to download from Google!\n",
                             firewall=True)

コード例 #14

0

ファイルを表示

ファイル: ExaleadPPTXSearch.py プロジェクト: willvin313/SimplyEmail

    def search(self):
        dl = Download.Download(self.verbose)
        convert = Converter.Converter(verbose=self.verbose)
        while self.Counter <= self.Limit and self.Counter <= 10:
            helpers.modsleep(1)
            if self.verbose:
                p = ' [*] Exalead PPTX Search on page: ' + str(self.Counter)
                self.logger.info('ExaleadPPTXSearch on page: ' +
                                 str(self.Counter))
                print helpers.color(p, firewall=True)
            try:
                url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
                      '"+filetype:pptx&elements_per_page=' + \
                    str(self.Quanity) + '&start_index=' + str(self.Counter)
            except Exception as e:
                self.logger.error('ExaleadPPTXSearch could not build URL')
                error = " [!] Major issue with Exalead PPTX Search: " + str(e)
                print helpers.color(error, warning=True)
            try:
                RawHtml = dl.requesturl(url, useragent=self.UserAgent)
                # sometimes url is broken but exalead search results contain
                # e-mail
                self.Text += RawHtml
                soup = BeautifulSoup(RawHtml, "lxml")
                self.urlList = [
                    h2.a["href"]
                    for h2 in soup.findAll('h4', class_='media-heading')
                ]
            except Exception as e:
                self.logger.error(
                    'ExaleadPPTXSearch could not request / parse HTML')
                error = " [!] Fail during parsing result: " + str(e)
                print helpers.color(error, warning=True)
            self.Counter += 30

        # now download the required files
        try:
            for url in self.urlList:
                if self.verbose:
                    p = ' [*] Exalead PPTX search downloading: ' + str(url)
                    self.logger.info('ExaleadPPTXSearch downloading: ' +
                                     str(url))
                    print helpers.color(p, firewall=True)
                try:
                    filetype = ".pptx"
                    dl = Download.Download(self.verbose)
                    FileName, FileDownload = dl.download_file(url, filetype)
                    if FileDownload:
                        if self.verbose:
                            p = ' [*] Exalead PPTX file was downloaded: ' + \
                                str(url)
                            self.logger.info('ExaleadDOCSearch downloaded: ' +
                                             str(p))
                            print helpers.color(p, firewall=True)
                        ft = helpers.filetype(FileName).lower()
                        if 'powerpoint' in ft:
                            self.Text += convert.convert_zip_to_text(FileName)
                        else:
                            self.logger.warning(
                                'Downloaded file is not a PPTX: ' + ft)
                except Exception as e:
                    error = " [!] Issue with opening PPTX Files:%s" % (str(e))
                    print helpers.color(error, warning=True)
                try:
                    dl.delete_file(FileName)
                except Exception as e:
                    print e
        except Exception as e:
            self.logger.error("ExaleadPPTXSearch no doc's to download")
            print helpers.color(" [*] No PPTX's to download from Exalead!\n",
                                firewall=True)

        if self.verbose:
            p = ' [*] Searching PPTX from Exalead Complete'
            print helpers.color(p, status=True)

コード例 #15

0

ファイルを表示

    def search(self):
        convert = Converter.Converter(verbose=self.verbose)
        while self.Counter <= self.Limit and self.Counter <= 10:
            time.sleep(1)
            if self.verbose:
                p = ' [*] Exalead Search on page: ' + str(self.Counter)
                print helpers.color(p, firewall=True)
            try:
                url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
                      '"+filetype:pdf&elements_per_page=' + \
                    str(self.Quanity) + '&start_index=' + str(self.Counter)
            except Exception as e:
                error = " [!] Major issue with Exalead PDF Search: " + str(e)
                print helpers.color(error, warning=True)
            try:
                r = requests.get(url, headers=self.UserAgent)
            except Exception as e:
                error = " [!] Fail during Request to Exalead (Check Connection):" + str(
                    e)
                print helpers.color(error, warning=True)
            try:
                RawHtml = r.content
                # sometimes url is broken but exalead search results contain
                # e-mail
                self.Text += RawHtml
                soup = BeautifulSoup(RawHtml, "lxml")
                self.urlList = [h2.a["href"]
                                for h2 in soup.findAll('h4', class_='media-heading')]
            except Exception as e:
                error = " [!] Fail during parsing result: " + str(e)
                print helpers.color(error, warning=True)
            self.Counter += 30

        # now download the required files
        try:
            for url in self.urlList:
                if self.verbose:
                    p = ' [*] Exalead PDF search downloading: ' + str(url)
                    print helpers.color(p, firewall=True)
                try:
                    filetype = ".pdf"
                    dl = Download.Download(self.verbose)
                    FileName, FileDownload = dl.download_file(url, filetype)
                    if FileDownload:
                        if self.verbose:
                            p = ' [*] Exalead PDF file was downloaded: ' + \
                                str(url)
                            print helpers.color(p, firewall=True)
                        self.Text += convert.convert_pdf_to_txt(FileName)
                except Exception as e:
                    pass
                try:
                    dl.delete_file(FileName)
                except Exception as e:
                    print e
        except:
            print helpers.color(" [*] No PDF's to download from Exalead!\n", firewall=True)

        if self.verbose:
            p = ' [*] Searching PDF from Exalead Complete'
            print helpers.color(p, status=True)

コード例 #16

0

ファイルを表示

ファイル: GoogleXLSXSearch.py プロジェクト: bjb28/SimplyEmail

 def search(self):
     convert = Converter.Converter(verbose=self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = " [*] Google XLSX Search on page: " + str(self.Counter)
             self.logger.info("Google XLSX Search on page: " +
                              str(self.Counter))
             print helpers.color(p, firewall=True)
         try:
             urly = ("https://www.google.com/search?q=site:" + self.Domain +
                     "+filetype:xlsx&start=" + str(self.Counter))
         except Exception as e:
             error = " [!] Major issue with Google XLSX Search:" + str(e)
             self.logger.error("GoogleXlsxSearch failed to build url: " +
                               str(e))
             print helpers.color(error, warning=True)
         try:
             r = requests.get(urly)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + str(
                 e)
             self.logger.error(
                 "GoogleXlsxSearch failed to request url (Check Connection): "
                 + str(e))
             print helpers.color(error, warning=True)
         RawHtml = r.content
         soup = BeautifulSoup(RawHtml)
         # I use this to parse my results, for URLS to follow
         for a in soup.findAll("a"):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(urlparse.urlparse(
                     a["href"]).query)["q"][0]
                 if l.startswith("http") or l.startswith("www"):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
         helpers.modsleep(self.Sleep, jitter=self.Jitter)
     # now download the required files
     self.logger.debug(
         "GoogleXlsxSearch completed HTML result query, starting downloads")
     try:
         for url in self.urlList:
             if self.verbose:
                 p = " [*] Google XLSX search downloading: " + str(url)
                 self.logger.info("Google XLSX search downloading: " +
                                  str(url))
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".xlsx"
                 dl = Download.Download(self.verbose)
                 FileName, FileDownload = dl.download_file(url, filetype)
                 if FileDownload:
                     if self.verbose:
                         p = " [*] Google XLSX file was downloaded: " + str(
                             url)
                         self.logger.info(
                             "Google XLSX file was downloaded: " + str(url))
                         print helpers.color(p, firewall=True)
                     self.Text += convert.convert_Xlsx_to_Csv(FileName)
                 # print self.Text
             except Exception as e:
                 print helpers.color(" [!] Issue with opening Xlsx Files\n",
                                     firewall=True)
                 self.logger.error("Google XLSX had issue opening file")
             try:
                 dl.delete_file(FileName)
             except Exception as e:
                 self.logger.error("Google XLSX failed to delete file: " +
                                   str(e))
     except Exception as e:
         print helpers.color(" [*] No XLSX's to download from google!\n",
                             firewall=True)
         self.logger.error("No XLSX's to download from google! " + str(e))

コード例 #17

0

ファイルを表示

ファイル: ExaleadDOCXSearch.py プロジェクト: willvin313/SimplyEmail

    def search(self):
        convert = Converter.Converter(verbose=self.verbose)
        while self.Counter <= self.Limit:
            time.sleep(1)
            if self.verbose:
                p = ' [*] Exalead Search on page: ' + str(self.Counter)
                self.logger.info("ExaleadDOCXSearch on page: " +
                                 str(self.Counter))
                print helpers.color(p, firewall=True)
            try:
                url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \
                      '"+filetype:docx&elements_per_page=' + \
                    str(self.Quanity) + '&start_index=' + str(self.Counter)
            except Exception as e:
                self.logger.error("Issue building URL to search")
                error = " [!] Major issue with Exalead DOCX Search: " + str(e)
                print helpers.color(error, warning=True)
            try:
                r = requests.get(url, headers=self.UserAgent)
            except Exception as e:
                error = " [!] Fail during Request to Exalead (Check Connection):" + str(
                    e)
                print helpers.color(error, warning=True)
            try:
                RawHtml = r.content
                # sometimes url is broken but exalead search results contain
                # e-mail
                self.Text += RawHtml
                soup = BeautifulSoup(RawHtml, "lxml")
                self.urlList = [
                    h2.a["href"]
                    for h2 in soup.findAll('h4', class_='media-heading')
                ]
            except Exception as e:
                self.logger.error("Fail during parsing result: " + str(e))
                error = " [!] Fail during parsing result: " + str(e)
                print helpers.color(error, warning=True)
            self.Counter += 30

        # now download the required files
        try:
            for url in self.urlList:
                if self.verbose:
                    p = ' [*] Exalead DOCX search downloading: ' + str(url)
                    self.logger.info("Starting download of DOCX: " + str(url))
                    print helpers.color(p, firewall=True)
                try:
                    filetype = ".docx"
                    dl = Download.Download(self.verbose)
                    FileName, FileDownload = dl.download_file(url, filetype)
                    if FileDownload:
                        if self.verbose:
                            self.logger.info("File was downloaded: " +
                                             str(url))
                            p = ' [*] Exalead DOCX file was downloaded: ' + \
                                str(url)
                            print helpers.color(p, firewall=True)
                        self.Text += convert.convert_docx_to_txt(FileName)
                except Exception as e:
                    self.logger.error("Issue with opening DOCX Files: " +
                                      str(e))
                    error = " [!] Issue with opening DOCX Files:%s\n" % (
                        str(e))
                    print helpers.color(error, warning=True)
                try:
                    dl.delete_file(FileName)
                except Exception as e:
                    print e
        except Exception as e:
            p = " [*] No DOCX's to download from Exalead: " + e
            self.logger.info("No DOCX's to download from Exalead: " + str(e))
            print helpers.color(p, firewall=True)

        if self.verbose:

            p = ' [*] Searching DOCX from Exalead Complete'
            self.logger.info("Searching DOCX from Exalead Complete")
            print helpers.color(p, status=True)

コード例 #18

0

ファイルを表示

 def search(self):
     # setup for helpers in the download class
     convert = Converter.Converter(verbose=self.verbose)
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google PDF Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             urly = "https://www.google.com/search?q=site:" + \
                 self.Domain + "+filetype:pdf&start=" + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             r = requests.get(urly)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         RawHtml = r.content
         # get redirect URL
         # Url = r.url
         dl.GoogleCaptchaDetection(RawHtml)
         soup = BeautifulSoup(RawHtml)
         for a in soup.findAll('a'):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(
                     urlparse.urlparse(a['href']).query)['q'][0]
                 if l.startswith('http') or l.startswith('www'):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
     # now download the required files
     try:
         for url in self.urlList:
             if self.verbose:
                 p = ' [*] Google PDF search downloading: ' + str(url)
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".pdf"
                 # use new helper class to download file
                 FileName, FileDownload = dl.download_file(url, filetype)
                 # check if the file was downloaded
                 if FileDownload:
                     if self.verbose:
                         p = ' [*] Google PDF file was downloaded: ' + \
                             str(url)
                         print helpers.color(p, firewall=True)
                     self.Text += convert.convert_pdf_to_txt(FileName)
             except Exception as e:
                 print e
             try:
                 # now remove any files left behind
                 dl.delete_file(FileName)
             except Exception as e:
                 print e
     except:
         print helpers.color(" [*] No PDF's to download from Google!\n", firewall=True)

コード例 #19

0

ファイルを表示

ファイル: PasteBinSearch.py プロジェクト: willvin313/SimplyEmail

    def search(self):
        dl = Download.Download(self.verbose)
        while self.Counter <= self.Limit and self.Counter <= 100:
            time.sleep(1)
            if self.verbose:
                p = ' [*] Google Search for PasteBin on page: ' + \
                    str(self.Counter)
                self.logger.info("GooglePasteBinSearch on page: " +
                                 str(self.Counter))
                print helpers.color(p, firewall=True)
            try:
                url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + str(self.Counter) + \
                      '&hl=en&meta=&q=site:pastebin.com+"%40' + \
                    self.Domain + '"'
            except Exception as e:
                error = " [!] Major issue with Google Search for PasteBin:" + \
                    str(e)
                self.logger.error(
                    "GooglePasteBinSearch could not create URL: " + str(e))
                print helpers.color(error, warning=True)

            try:
                r = requests.get(url, headers=self.UserAgent)
            except Exception as e:
                error = " [!] Fail during Request to PasteBin (Check Connection):" + str(
                    e)
                self.logger.error(
                    "Fail during Request to PasteBin (Check Connection): " +
                    str(e))
                print helpers.color(error, warning=True)
            try:
                RawHtml = r.content
                try:
                    # check for captcha in the source
                    dl.GoogleCaptchaDetection(RawHtml)
                except Exception as e:
                    self.logger.error("Issue checking for captcha: " + str(e))
                soup = BeautifulSoup(RawHtml, "lxml")
                for a in soup.select('.r a'):
                    # remove urls like pastebin.com/u/Anonymous
                    if "/u/" not in str(a['href']):
                        self.urlList.append(a['href'])
            except Exception as e:
                error = " [!] Fail during parsing result: " + str(e)
                self.logger.error(
                    "PasteBinSearch Fail during parsing result: " + str(e))
                print helpers.color(error, warning=True)
            self.Counter += 100
        # Now take all gathered URL's and gather the Raw content needed
        for Url in self.urlList:
            try:
                Url = "http://pastebin.com/raw/" + str(Url).split('/')[3]
                data = requests.get(Url, timeout=2)
                self.Text += data.content
            except Exception as e:
                error = "[!] Connection Timed out on PasteBin Search:" + str(e)
                self.logger.error(
                    "Connection Timed out on PasteBin raw download: " + str(e))
                print helpers.color(error, warning=True)

        if self.verbose:
            p = ' [*] Searching PasteBin Complete'
            self.logger.info("Searching PasteBin Complete")
            print helpers.color(p, firewall=True)