Ejemplo n.º 1
0
 def is404(self, source):
     if Bundle().getString(31) in source:
         Logging(4, Bundle().getString(22), Bundle().getString(30))
         return True
     else:
         #Logging(1,Bundle().getString(22) ,Bundle().getString(32))
         return False
Ejemplo n.º 2
0
 def Read(self, path):
     if os.path.isfile(path):
         with open(path, "r", encoding="utf-8") as reader:
             return reader.read()
     else:
         Logging("Error", 'read', Bundle().getString(13))
         return ""
Ejemplo n.º 3
0
 def __init__(self, address, data, PROXY_ROTATION, AGENT_ROTATION):
     if address != "":
         Logging(1, Bundle().getString(22), address)
         self.address = address
     self.data = data
     self.PROXY_ROTATION = PROXY_ROTATION
     self.AGENT_ROTATION = AGENT_ROTATION
     self.OUTPUT = ''
Ejemplo n.º 4
0
 def __execute__(self, command):
     Logging(1, Bundle().getString(28), str(command))
     try:
         self.chrome.execute_script(str(command))
     except JavascriptException:
         self.__execute__(command)
         raise Bundle().getString(48)
     except TypeError:
         pass
     except NoSuchWindowException:
         print("CLOSED JS")
Ejemplo n.º 5
0
 def CountAllPages(self):
     xpath = '//span[@class="pagnDisabled"]/text()'
     self.chrome.address = self.category
     self.chrome.Navigate()
     self.chrome.Scroll_V_V()
     total = int(self.lxmlExtract(xpath))
     Logging(1, "AMAZON", "TOTAL FOUND " + str(total) + " PAGES")
     for i in range(int(total)):
         link = self.chrome.address.replace("page=1", "page=" + str(i))
         self.SUBSTACK.append(link)
         self.FS.Append("DB/AWS_OFFERS_SUBPAGES.sql", link, False)
     self.OctopusquickLook()
Ejemplo n.º 6
0
 def getString(self, num):
     if os.path.isfile(self.path):
         with open(self.path, "r") as __reader:
             for i, line in enumerate(__reader):
                 if i == int(num) - 1:
                     line = line.strip()
                     if line is None:
                         return "LINE {} IN {} IS NoneType".format(
                             str(num), self.path)
                     else:
                         if "#" in line:
                             return line.split("#")[1]
     else:
         Logging(2, "XPATH BUNDLE", "XPATH FILE NOT FOUND")
Ejemplo n.º 7
0
 def __new_ip__(self):
     try:
         with Controller.from_port(port=9051) as controller:
             controller.authenticate(password='******')
             socks.setdefaultproxy(proxy_type=socks.PROXY_TYPE_SOCKS5,
                                   addr="127.0.0.1",
                                   port=9050)
             controller.signal(Signal.NEWNYM)
             return True
     except Exception as ex:
         Logging(3,
                 Bundle().getString(57),
                 Bundle().getString(58) + '\n' + str(ex))
         return False
Ejemplo n.º 8
0
    def __init__(self, selector):
        self.delimiter = ":"
        self.delimiter2 = '~'
        self.newline = "\n"
        self.path = os.getcwd() + "/Spider/Extractor/Selectors/" + str(
            selector) + ".cort"
        if os.path.isfile(self.path) == False:
            Logging(2, Bundle().getString(60), Bundle().getString(61))
            exit()

        print(self.path)
        self.stack = fs().Read(self.path)
        self.cluster = {'Keys': [], 'Values': []}
        self.__xpath__ = None
        self.end = False
Ejemplo n.º 9
0
 def __aws_random__(self):
     header = {
         'authority': 'www.amazon.com',
         'pragma': 'no-cache',
         'cache-control': 'no-cache',
         'dnt': '1',
         'upgrade-insecure-requests': '1',
         'user-agent': AgencyFactory().__gen__(),
         'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
         'sec-fetch-site': 'none',
         'sec-fetch-mode': 'navigate',
         'sec-fetch-dest': 'document',
         'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8'
     }
     Logging(1,"HEADERS", "AMAZON.COM")
     return header
Ejemplo n.º 10
0
 def getImage(self):
     try:
         with req.Session() as __session__:
             if self.PROXY_ROTATION:
                 __tor__ = Tor()
                 __tor__.__new_ip__()
                 __session__.proxies = ProxyFactory().__tor_gen__()
             if self.AGENT_ROTATION:
                 __session__.headers = HeadersFactory().__aws_random__()
             self.OUTPUT = __session__.get(self.address, stream=True).raw
     except exceptions.URLRequired:
         raise Bundle().getString("1")
     except exceptions.URLRequired:
         raise Bundle().getString("2")
     except exceptions.MissingSchema:
         raise Bundle().getString("3")
     except exceptions.InvalidSchema:
         raise Bundle().getString("4")
     except exceptions.TooManyRedirects:
         raise Bundle().getString("5")
     except exceptions.ConnectTimeout:
         raise Bundle().getString("6")
     except exceptions.Timeout:
         raise Bundle().getString("7")
     except exceptions.HTTPError:
         raise Bundle().getString("8")
     except exceptions.SSLError:
         raise Bundle().getString("9")
     except exceptions.ProxyError:
         raise Bundle().getString("10")
     except exceptions.ConnectionError:
         raise Bundle().getString("11")
     finally:
         if self.OUTPUT is not None and self.OUTPUT != "":
             return self.OUTPUT
         else:
             Logging(1, Bundle().getString(26), Bundle().getString(25))
             return ""
Ejemplo n.º 11
0
 def noCapcha(self, url):
     __IMG__ = __http__(url, "", True, True).getImage()
     captcha = pytesseract.image_to_string(
         Image.open(__IMG__), config='--psm 7').strip().replace(" ", "")
     Logging(2, Bundle().getString(58), captcha)
     return captcha
Ejemplo n.º 12
0
 def __get_ip__(self):
     ip = __http__(__bundle__().getString(18), "", True, True).GET()
     Logging(1, __bundle__().getString(23), ip)
Ejemplo n.º 13
0
 def __gen__(self):
     agent = UserAgent(verify_ssl=False).random
     Logging(1,__bundle__().getString(24), agent)
     return agent