Example #1
0
 def get_page(self, link, form=None):
     #return source code.
     if self.is_running():
         try:
             with URLClose(request.url_open(link, self.cookie, form)) as s:
                 return s.read(BUFF_SZ)
         except (urllib2.URLError, httplib.HTTPException,
                 socket.error) as err:
             logger.debug(link)
             raise ParsingError(err)
Example #2
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        api = "http://turbobit.net/linkchecker/csv"
        form = urllib.urlencode({"links_to_check": link, })
        try:
            with URLClose(URLOpen().open(api, form)) as s:
                line = s.read()
                active = int(line.split(", ")[-1].strip())
                if active:
                    link_status = cons.LINK_ALIVE
                    with URLClose(URLOpen().open(link)) as s:
                        for line in s:
                            if 'class="download-file' in line:
                                s.next()
                                line_ = s.next()
                                name = line_.split("'>")[-1].split("<")[0]
                                tmp = line_.split('>')[-1].strip()
                                size = float(tmp.split("(")[-1].split(",")[0])
                                unit = tmp.split(" ")[-1].split(")")[0]
                                if unit.lower() == "kb":
                                    size = size * 1024
                                elif unit.lower() == "mb":
                                    size = size * 1024 * 1024
                                elif unit.lower() == "gb":
                                    size = size * 1024 * 1024 * 1024
                                break
                if link_status != cons.LINK_ALIVE:
                    link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
Example #3
0
 def add(self): #get_login son metodos de premium_accounts.py
     """
     Wupload API. (el link dura poco tiempo activo, asi q no lo usare)
     http://api.wupload.com/link?method=getDownloadLink&u=USER-EMAIL&p=PASSWORD&ids=1234
     
     try:
         tmp = link.split("/file/")[1].split("/")[0]
         link = "%s/file/%s" % (BASE_URL, tmp)
         file_id = link.split("/")[-1].strip("/")
         
         url = "http://api.wupload.com/link?method=getDownloadLink"
         dict_form = {"u": username, "p": password, "ids": file_id}
         
         with URLClose(URLOpen().open(url, urllib.urlencode(dict_form), range=(content_range, None)), always_close=False) as s:
             rsp = s.read()
             rsp_dict = json.loads(rsp)
             link_list = rsp_dict["FSApi_Link"]["getDownloadLink"]["response"]["links"]
             link_dict = link_list[0]
             link_file = link_dict["url"] #http:\/\/s74.wupload.com\/apidownload\/
             link_file = link_file.replace("\\", "")
             #link_file = url + "&u=" + username + "&p=" + password + "&ids=" + file_id
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         err_msg = err
     except Exception as err:
         logger.exception(err)
         err_msg = err
     """
     link_file = None
     err_msg = None
     source = None
     cookie = None
     
     cookie = self.get_cookie()
     try:
         status = cookie._cookies['.wupload.com']['/']['role'].value
     except Exception as err:
         logger.exception(err)
         cookie = None
         status = None
     
     if cookie and status == "premium": #login success
         try:
             with URLClose(URLOpen(cookie).open(self.link, range=(self.content_range, None)), always_close=False) as s:
                 source = s
                 #link_file = s.url
                 link_file = self.link
         except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
             err_msg = err
     
     return cookie, link_file, source, err_msg #puede ser el objeto archivo o None.
Example #4
0
def get_solved_captcha(url, cookie, filter=None):
    """
    @params: filter = a function wraping one or more clean_image functions.
    """
    try:
        with URLClose(request.get(url, cookie=cookie)) as s:
            image_data = s.read()
        t = Tesseract(image_data, filter)
        result = t.get_captcha()
    except Exception as err:
        logger.exception(err)
        return None
    else:
        return result
Example #5
0
 def get_cookie(self):  #cookie_handler
     """
     Uso:
     if cookie is not None:
         #connection success
         if cookie:
             #login success
         else:
             #login fail
     else:
         #server down
     """
     #for retry in range(COOKIE_CONNECTION_RETRY):
     url = "http://www.filesonic.com/user/login"
     dict_form = {
         "email": self.username,
         "redirect": "/",
         "password": self.password
     }
     headers = {
         "Content-type": "application/x-www-form-urlencoded",
         "X-Requested-With": "XMLHttpRequest",
         "Accept:": "application/json"
     }
     try:
         cookie = cookielib.CookieJar()
         with URLClose(
                 URLOpen(cookie).open(url,
                                      urllib.urlencode(dict_form),
                                      headers=headers)
         ) as s:  #eg: url= login-url, data = {"login": "******", "redir": "1", "username": user, "password", password}
             rsp_json = s.read()
             #print rsp_json
             #try:
             #dict_json = cjson.decode(rsp_json)
             #print dict_json["status"]
             #except:
             #pass
         #status = cookie._cookies['.filesonic.com']['/']['role'].value #anonymous, free or premium
         #if status == "anonymous": #login fail like.
         #return []
     except Exception as err:  #this only happen on http error, not bad-login, etc.
         logger.warning(err)
         #host_down = True
     else:
         return cookie
     return None  #server down, cant connect.
Example #6
0
 def get_page(self, link, form=None, close=True):
     #return source code.
     if self.is_running():
         link = utils.url_unescape(link)
         range = (None, None) if close else (self.content_range, None)
         try:
             with URLClose(request.url_open(link, self.cookie, form, range),
                           close) as s:
                 if close:
                     return s.read(BUFF_SZ)
                 else:
                     self.dl_link = link
                     return s
         except (urllib2.URLError, httplib.HTTPException,
                 socket.error) as err:
             logger.debug(link)
             raise ParsingError(err)
Example #7
0
    def check(self, link):
        """
        Rapidshare api: http://images.rapidshare.com/apidoc.txt
        
        Status integer, which can have the following numeric values:
            0=File not found
            1=File OK
            3=Server down
            4=File marked as illegal
            5=Direct download
        """
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR

        FILE_ID, FILE_NAME, SIZE, SERVER_ID, STATUS, SHORT_HOST, MD5 = range(7)

        try:
            id = link.split("/")[-2]
            file_name = link.split("/")[-1]
            #http://api.rapidshare.com/cgi-bin/rsapi.cgi?sub=subroutine&files=value1&filenames=value2
            with URLClose(
                    request.post("http://api.rapidshare.com/cgi-bin/rsapi.cgi",
                                 data={
                                     "sub": "checkfiles",
                                     "files": id,
                                     "filenames": file_name
                                 },
                                 timeout=10)) as s:
                tmp = s.read().split(",")
                #print tmp
                name = tmp[FILE_NAME]
                size = int(tmp[SIZE])
                if int(tmp[STATUS]) in (1, 5):  #alive or direct download
                    link_status = cons.LINK_ALIVE
                elif int(tmp[STATUS]) == 3:  #server down
                    link_status = cons.LINK_UNAVAILABLE
                else:
                    link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)
        return link_status, name, size, status_msg
Example #8
0
    def check(self, link):
        """"""
        video_id = link.split("&")[0].split("=")[-1]

        for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
            video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                              % (video_id, el_type))
            with URLClose(request.get(video_info_url)) as s:
                video_info = parse_qs(s.read())
                if 'token' in video_info:
                    #print video_info
                    #print video_info_url
                    break

        video_title = urllib.unquote_plus(video_info['title'][0])

        return cons.LINK_ALIVE, video_title, 0, None
Example #9
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                alive = False
                for line in s:
                    if '<title>' in line:
                        tmp = line.split("-")
                        if len(tmp) > 2:
                            tmp_name = link.split("/files/")[-1].split("/")
                            if len(tmp_name) == 2:
                                name = tmp_name[-1].rstrip(
                                    ".html")  #complete name
                            else:
                                name = tmp[0].strip().split(" ")[
                                    -1]  #shorted name, ie: filenam...part1.rar
                            link_status = cons.LINK_ALIVE
                            alive = True
                        else:
                            link_status = cons.LINK_DEAD
                    elif alive and "<h1>" in line and name in line:
                        tmp = line.split("-")[-1].strip()
                        unit = tmp.split(" ")[-1]  #
                        size = float(tmp.split(" ")[0])
                        #convert size to bytes.
                        if "kb" in unit.lower():
                            size = size * 1024
                        elif "mb" in unit.lower():
                            size = size * 1024 * 1024
                        elif "gb" in unit.lower():
                            size = size * 1024 * 1024 * 1024
                        break
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
Example #10
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR

        try:
            #strip file name
            tmp = link.split("/file/")[1].split("/")[0]
            link = "%s/file/%s" % (BASE_URL, tmp)
            link_quoted = urllib.quote_plus(link)
            with URLClose(
                    request.get(
                        "http://www.filefactory.com/tool/links.php?func=links&links="
                        + link_quoted,
                        timeout=10)) as s:
                alive = False
                for line in s:
                    if 'Available' in line:
                        alive = True
                    elif alive:
                        if 'class="metadata"' in line:
                            name = line.split('class="metadata">')[-1].split(
                                '</div>')[0].split('/')[-1].strip()
                            name = html_entities_parser(name)
                            s.next()
                            size_list = s.next().split("<td>")[-1].split(
                                "</td>")[0].split(" ")
                            #size = "".join(size_list)
                            size = int(float(size_list[0]))
                            link_status = cons.LINK_ALIVE
                            break
            if link_status != cons.LINK_ALIVE:
                link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
            logger.warning(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
Example #11
0
    def check(self, link):
        """"""
        name = cons.UNKNOWN
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            if "/video/" in link:
                link = link.replace("/video/", "/download/")
            elif "/audio/" in link:
                link = link.replace("/audio/", "/download/")
            elif "/image/" in link:
                link = link.replace("/image/", "/download/")
            with URLClose(URLOpen().open(link)) as s:
                for line in s:
                    if 'File Name:' in line:
                        name = s.next().split("</font>")[0].split(
                            '>')[-1].strip()
                        name = misc.html_entities_parser(name)
                    elif 'File Size:' in line:
                        tmp = line.split("</font>")[0].split('>')[-1].strip()
                        unit = tmp[-2:].strip()
                        size = float(tmp[:-2])
                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
            if size:
                link_status = cons.LINK_ALIVE
            else:
                link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            name, size = cons.UNKNOWN, 0
            logger.exception(err)

        return link_status, name, size, status_msg
Example #12
0
 def get_account_status(self, cookie):
     """
     Wupload API.
     """
     try:
         url = "http://api.wupload.com/user?method=getInfo"
         dict_form = {"u": self.username, "p": self.password}
         with URLClose(URLOpen().open(url, urllib.urlencode(dict_form))) as s:
             rsp = s.read()
         rsp_dict = json.loads(rsp)
         #if rsp_dict["FSApi_User"]["getInfo"]["status"] == "success":
         is_premium = rsp_dict["FSApi_User"]["getInfo"]["response"]["users"]["user"]["is_premium"]
         if is_premium:
             return cons.ACCOUNT_PREMIUM
         else:
             return cons.ACCOUNT_FREE
     except KeyError as err:
         return cons.ACCOUNT_FAIL
     except Exception as err:
         #ValueError: json exception.
         logger.exception(err)
     return cons.ACCOUNT_ERROR
Example #13
0
 def get_account_status(self, cookie):
     """"""
     try:
         if cookie is not None:  #cant connect
             if cookie:  #login fail.
                 with URLClose(
                         URLOpen(cookie).open("http://www.megaupload.com")
                 ) as s:  #Close conection, en caso de except o al terminar de leer.
                     premium = False
                     for line in s:
                         if 'class="stars_' in line:
                             premium = True
                             break
                     if premium:
                         return cons.ACCOUNT_PREMIUM
                     else:
                         return cons.ACCOUNT_FREE
             else:
                 return cons.ACCOUNT_FAIL
         else:
             return cons.ACCOUNT_ERROR
     except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
         return cons.ACCOUNT_ERROR
Example #14
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link, timeout=10)) as s:
                for line in s:
                    if 'class="f_arial f_14px"' in line:
                        name = line.split('"f_arial f_14px">')[-1].split(
                            '<')[0].strip()
                        name = misc.html_entities_parser(name)
                        tmp = s.next().split(":")[-1].split("<")[0].strip()
                        unit = tmp.split(" ")[-1].strip()
                        size = float(tmp.split(" ")[0].strip())
                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
            if size:
                link_status = cons.LINK_ALIVE
            else:
                link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            name, size = cons.UNKNOWN, 0
            logger.exception(err)

        return link_status, name, size, status_msg
Example #15
0
 def check(self, link):
     """"""
     name = cons.UNKNOWN
     size = 0
     status_msg = None
     link_status = cons.LINK_ERROR
     #for retry_count in range(RETRIES):
     try:
         id = link.split("d=")[-1].strip()
         if "&" in id:
             id = id.split("&")[0]
         #TODO: enviar cookie.
         with URLClose(
                 URLOpen().open(
                     "http://www.megaupload.com/mgr_linkcheck.php",
                     form=urllib.urlencode({"id0": id}))
         ) as s:  #urllib.urlencode = diccionario para hacer POST. http://www.megaupload.com/mgr_linkcheck.php&id0=id
             tmp = s.read().split("&")  #returns a list.
             if len(tmp) > 4:
                 name = "&".join(tmp[5:]).split("n=")[1]
                 size = int(tmp[3].split("s=")[1])
                 link_status = cons.LINK_ALIVE
             elif tmp[
                     2] == 'id0=3':  #id0=1 dead, id0=3 unavailable, id0=0 alive
                 #name = "Unknown"
                 #size = None
                 link_status = cons.LINK_UNAVAILABLE
                 status_msg = "Temporarily Unavailable. You can add the file anyway (it will be downloaded later)"  #not used, yet.
             else:
                 link_status = cons.LINK_DEAD
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         status_msg = "Error: {0}".format(err)
         pass
         #if isinstance(err.reason, socket.timeout):
         #break
     except Exception, err:
         logger.exception(err)
Example #16
0
    def check(self, link):
        """"""
        name = cons.UNKNOWN
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(URLOpen().open(link)) as s:
                for line in s:
                    if 'name="description"' in line:
                        name = line.split('content="')[-1].split(
                            " | Free file hosting")[0]
                        name = misc.html_entities_parser(name)
                    elif "File Size:</b>" in line:
                        tmp = line.split("</b>")[-1].split("</div>")[0].strip()
                        unit = tmp[-2:]
                        size = float(tmp[:-2])
                        #convert size to bytes.
                        if unit == "KB":
                            size = size * 1024
                        elif unit == "MB":
                            size = size * 1024 * 1024
                        elif unit == "GB":
                            size = size * 1024 * 1024 * 1024
                        break
            if size:
                link_status = cons.LINK_ALIVE
            else:
                link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            name, size = cons.UNKNOWN, 0
            logger.exception(err)

        return link_status, name, size, status_msg
Example #17
0
    def check(self, link):
        """"""
        name = cons.UNKNOWN
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                found = False
                for line in s:
                    if 'download_file_title">' in line:
                        found = True
                        link_status = cons.LINK_ALIVE
                        name = line.split('download_file_title">')[-1].split(
                            '<')[0].strip()
                        tmp = line.split('class="download_link')[1].split(
                            '<span>(')[-1].split(')')[0].strip()
                        unit = tmp.split(" ")[-1].strip()
                        size = float(tmp.split(" ")[0].strip())

                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
                if not found:
                    link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            logger.exception(err)
        return link_status, name, size, status_msg
Example #18
0
    def add(self):  #wait_func: wait method from thread_managed
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            file_id = self.link.split("turbobit.net/")[-1].split(
                "/")[0].rstrip(".html")
            self.link = BASE_URL + "/download/free/" + file_id
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            with URLClose(opener.open(self.link)) as s1:
                key = None
                for line in s1:
                    if "challenge?k=" in line:
                        key = line.split('challenge?k=')[-1].split('"')[0]
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        break
                if key is None:
                    raise LimitExceededException("Limit Exceeded")
                for retry in range(3):
                    challenge, response = c.solve_captcha()
                    if response is not None:
                        form = urllib.urlencode([
                            ("recaptcha_challenge_field", challenge),
                            ("recaptcha_response_field", response),
                            ("captcha_type", "recaptcha"),
                            ("captcha_subtype", "")
                        ])
                        with URLClose(opener.open(self.link, form)) as s2:
                            found = False
                            for line in s2:
                                if "limit :" in line:
                                    found = True
                                    try:
                                        wait = int(
                                            line.split(":")[-1].split(",")
                                            [0]) / 100  #ms
                                    except Exception as err:
                                        logger.exception(err)
                                        wait = WAITING
                                elif "captcha-error" in line:
                                    err_msg = "Wrong captcha"
                            if found:
                                if self.wait_func(wait + 1):
                                    return self.link, None, err_msg
                                url = BASE_URL + "/download/getLinkAfterTimeout/" + file_id
                                print url
                                with URLClose(opener.open(url)) as s3:
                                    for line in s3:
                                        print line
                                        if "href='/download/redirect" in line:
                                            tmp = line.split(
                                                "href='")[-1].split("'")[0]
                                            redir_url = BASE_URL + tmp
                                            print redir_url
                                            with URLClose(
                                                    opener.open(
                                                        redir_url)) as s4:
                                                for line in s4:
                                                    if 'href="' in line:
                                                        link_file = line.split(
                                                            'href="'
                                                        )[-1].split('"')[0]
                                                        #print link_file
                                                        with URLClose(
                                                                opener.open(
                                                                    link_file,
                                                                    range=
                                                                    (self.
                                                                     content_range,
                                                                     None)),
                                                                always_close=
                                                                False) as s5:
                                                            source = s5
                                                        raise FileLinkFoundException(
                                                        )
                    else:
                        raise CaptchaException("No response from the user")

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, LimitExceededException,
                LinkErrorException, CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return link_file, source, err_msg
Example #19
0
    def add(self):
        """"""
        link_file = None
        err_msg = None
        source = None
        wait = WAITING
        max_retries = 3

        try:
            #Remove the filename from the url
            tmp = self.link.split("/file/")[1].split("/")[0]
            self.link = "%s/file/%s" % (BASE_URL, tmp)

            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            with URLClose(opener.open(self.link)) as s:
                if self.wait_func():
                    return self.link, None, err_msg
                for line in s:
                    if 'check:' in line:
                        check = line.split("check:'")[1].replace("'",
                                                                 "").strip()
                    elif "Recaptcha.create" in line:
                        tmp = line.split('("')[1].split('"')[0]
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        for retry in range(3):
                            if self.wait_func():
                                return self.link, None, err_msg
                            if retry < (max_retries + 1):
                                challenge, response = c.solve_captcha()
                                if response is not None:

                                    #Filefactory perfoms as check on its server by doing an
                                    #Ajax request sending the following data
                                    form = urllib.urlencode([
                                        ("recaptcha_challenge_field",
                                         challenge),
                                        ("recaptcha_response_field", response),
                                        ("recaptcha_shortencode_field",
                                         "undefined"), ("check", check)
                                    ])
                                    recaptcha_link = "%s/file/checkCaptcha.php" % BASE_URL

                                    #Getting the result back, status:{"ok"|"fail"}
                                    with URLClose(
                                            opener.open(recaptcha_link,
                                                        form)) as sa:
                                        if self.wait_func():
                                            return self.link, None, err_msg
                                        for line in sa:
                                            if 'status:"ok"' in line:
                                                tmp = line.split(
                                                    'path:"')[-1].split('"')[0]
                                                tmp_link = "%s%s" % (BASE_URL,
                                                                     tmp)
                                                with URLClose(
                                                        opener.open(
                                                            tmp_link)) as sb:
                                                    if self.wait_func():
                                                        return self.link, None, err_msg
                                                    for line in sb:
                                                        if 'countdown">' in line:
                                                            #Try to get WAIT from the page
                                                            try:
                                                                tmp = line.split(
                                                                    'countdown">'
                                                                )[-1].split(
                                                                    "<")[0]
                                                                tmp = int(tmp)
                                                                if tmp > 320:
                                                                    raise LimitExceededException(
                                                                        "Limit Exceeded"
                                                                    )
                                                            except ValueError:
                                                                pass
                                                            else:
                                                                if tmp > 0:
                                                                    wait = tmp
                                                        if 'id="downloadLinkTarget' in line:
                                                            link_file = line.split(
                                                                '<a href="'
                                                            )[1].split('"')[0]
                                                            if self.wait_func(
                                                                    wait):
                                                                return self.link, None, err_msg
                                                            with URLClose(
                                                                    opener.
                                                                    open(
                                                                        link_file,
                                                                        range=
                                                                        (self.
                                                                         content_range,
                                                                         None
                                                                         )),
                                                                    always_close
                                                                    =False
                                                            ) as sc:
                                                                try:
                                                                    if sc.status == 302:  #redirect error 302.
                                                                        raise RedirectException(
                                                                            "Redirection error"
                                                                        )
                                                                except AttributeError as err:  #no redirected.
                                                                    source = sc
                                                            raise FileLinkFoundException(
                                                            )
                                else:
                                    raise CaptchaException(
                                        "No response from the user")
                            else:
                                raise CaptchaException(
                                    "Captcha, max retries reached")
                        raise LinkNotFoundException()
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (RedirectException, FileLinkFoundException,
                LinkNotFoundException, CaptchaException,
                LimitExceededException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            elif not isinstance(err, FileLinkFoundException):
                logger.info(err)
                err_msg = err
        except Exception as err:
            logger.exception(err)

        return self.link, source, err_msg  #puede ser el objeto archivo o None.
Example #20
0
    def add(self):  #wait_func: wait method from thread_managed
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = None
        found = False

        try:
            #Remove the filename from the url
            tmp = self.link.split("/file/")[1].split("/")[0]
            self.link = "%s/file/%s" % (BASE_URL, tmp)

            file_id = self.link.split("/")[-1].strip("/")
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            #form = urllib.urlencode([("checkTimeLimit", "check")]) #deprecated by fileserve
            form = urllib.urlencode([("checkDownload", "check")])
            post_result = opener.open(self.link, form).read()
            if "success" not in post_result:
                if "timeLimit" in post_result:
                    raise LimitExceededException("Limit Exceeded")
                else:
                    raise LinkErrorException("Link Error")

            with URLClose(opener.open(self.link)) as s:
                for line in s:
                    if 'reCAPTCHA_publickey=' in line:
                        tmp = line.split("'")[1].split("'")[0]
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                        if self.wait_func(
                        ):  #wait... if true: download was stopped
                            return self.link, None, err_msg
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        for retry in range(3):
                            if self.wait_func(
                            ):  #wait... if true: download was stopped
                                return self.link, None, err_msg
                            challenge, response = c.solve_captcha()
                            if response is not None:
                                #Submit the input to the recaptcha system
                                form = urllib.urlencode([
                                    ("recaptcha_challenge_field", challenge),
                                    ("recaptcha_response_field", response),
                                    ("recaptcha_shortencode_field", file_id)
                                ])
                                recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL

                                #Captcha is good
                                #on error: {"success":0,"error":"incorrect-captcha-sol"}
                                #on succes: {"success":1}
                                with URLClose(opener.open(recaptcha_url,
                                                          form)) as sa:
                                    if "error" not in sa.read():
                                        form = urllib.urlencode([
                                            ("downloadLink", "wait")
                                        ])
                                        with URLClose(
                                                opener.open(self.link,
                                                            form)) as sb:
                                            wait = int(sb.read(
                                            )[-2:])  #somethimes gives fail404
                                            if self.wait_func(
                                                    wait
                                            ):  #wait... if true: download was stopped
                                                return self.link, None, err_msg
                                            form = urllib.urlencode([
                                                ("downloadLink", "show")
                                            ])
                                            with URLClose(
                                                    opener.open(
                                                        self.link,
                                                        form)) as sc:
                                                if self.wait_func(
                                                ):  #wait... if true: download was stopped
                                                    return self.link, None, err_msg
                                                sc.read()
                                                form = urllib.urlencode([
                                                    ("download", "normal")
                                                ])
                                                with URLClose(
                                                        opener.open(
                                                            self.link,
                                                            form,
                                                            range=(
                                                                self.
                                                                content_range,
                                                                None)),
                                                        always_close=False
                                                ) as sd:
                                                    if sd.url == self.link:  #link not found or weird countdown issue
                                                        #logger.debug(sd.read())
                                                        raise LinkErrorException(
                                                            "Link Error, redirected"
                                                        )
                                                    else:
                                                        source = sd  #,content_range)
                                            break
                                    else:
                                        err_msg = "Wrong captcha"
                            else:
                                raise CaptchaException(
                                    "No response from the user")
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (LimitExceededException, LinkErrorException,
                CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return self.link, source, err_msg  #puede ser el objeto archivo o None.
Example #21
0
    def thread_download(self, fh, i, chunk, is_first):
        # first thread wont retry.
        # downloading chunk wont retry.
        # not downloading and not first should retry.
        is_downloading = False
        buf = StringIO()
        len_buf = 0

        def flush():
            self.flush_buffer(fh, i, chunk, buf, len_buf)

        try:
            with URLClose(self.get_source(chunk, is_first)) as s:
                if not is_first and not self.is_valid_range(s, chunk[START]):
                    raise BadSource(
                        'Link expired, or cant download the requested range.')

                with self.lock2:
                    if self.chunks_control[i]:
                        self.chunks_control[i] = False
                        self.conn_count += 1
                        is_downloading = True
                    else:
                        raise CanNotRun(
                            'Another thread has taken over this chunk.')

                while True:
                    data = s.read(NT_BUFSIZ)
                    len_data = len(data)

                    buf.write(data)
                    len_buf += len_data
                    chunk = (chunk[START] + len_data, chunk[END])

                    if len_buf >= DATA_BUFSIZ:
                        flush()
                        buf = StringIO()
                        len_buf = 0

                    with self.lock2:
                        self.size_complete += len_data

                    if self.bucket.rate:
                        nap = self.bucket.consume(len_data)
                        if nap:  # avoid thread switching if nap == 0
                            time.sleep(nap)

                    if self.stop_flag or self.error_flag:
                        return

                    if not len_data or (chunk[END] and chunk[START] >=
                                        chunk[END]):  # end may be 0
                        flush()
                        buf = StringIO()
                        len_buf = 0
                        logger.debug("complete {0} {1}".format(
                            chunk[START], chunk[END]))
                        if not self.is_chunk_complete(chunk):
                            raise IncompleteChunk('Incomplete chunk')
                        chunk = self.dl_next_chunk(chunk, i + 1)
                        logger.debug("keep dl {0} {1}".format(
                            chunk[START], chunk[END]))
                        i += 1

        except IncompleteChunk as err:
            # propagate
            self.set_err(err)
        except (BadSource, CanNotRun) as err:
            # do not propagate
            logger.debug(err)
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            if is_first or is_downloading:
                # propagate
                self.set_err(err)
            else:
                logger.debug(err)
                # retry?
        except EnvironmentError as err:
            # propagate
            self.set_err(err)
        finally:
            if is_downloading:
                flush()
Example #22
0
    def add(self):  #wait_func: wait method from thread_managed
        """
        http://api.wupload.com/
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING
        found = False

        try:
            #Remove the filename from the url
            tmp = self.link.split("/file/")[1].split("/")[0]
            self.link = "%s/file/%s" % (BASE_URL, tmp)

            file_id = self.link.split("/")[-1].strip("/")
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            #form = urllib.urlencode([("checkTimeLimit", "check")]) #deprecated by fileserve
            if self.wait_func():  #wait... if true: download was stopped
                return self.link, None, err_msg
            """
            form_action = "{0}?start=1".format(link)
            it = opener.open(form_action)
            form_action = "{0}?start=1".format(it.geturl()) #get redirect url
            #end = form_action.split(".")[2].split("/")[0] #get .com replacement
            form_action2 = "{0}/{1}?start=1".format(link, file_id)
            #form_action2 = form_action2.replace(".com", end)
            form = urllib.urlencode([("foo", "foo")]) #force urllib2 to do a post
            headers = {"X-Requested-With": "XMLHttpRequest", }
            """

            it = opener.open(self.link)
            form_action = "{0}?start=1".format(it.geturl())
            form = urllib.urlencode({})
            headers = {
                "X-Requested-With": "XMLHttpRequest",
            }

            with URLClose(opener.open(form_action, form,
                                      headers=headers)) as s:
                if self.wait_func():  #wait... if true: download was stopped
                    return self.link, None, err_msg
                #when there is a countdown, the page need to be reloaded and search for the captcha again.
                for countdown in range(3):
                    for line in s:
                        if 'Recaptcha.create("' in line:
                            tmp = line.split('"')[1].split('"')[0]
                            recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                            if self.wait_func(
                            ):  #wait... if true: download was stopped
                                return self.link, None, err_msg
                            c = Recaptcha(BASE_URL, recaptcha_link,
                                          self.wait_func)
                            for retry in range(3):
                                challenge, response = c.solve_captcha()
                                if self.wait_func(
                                ):  #wait... if true: download was stopped
                                    return self.link, None, err_msg
                                if response is not None:
                                    #Submit the input to the recaptcha system
                                    form = urllib.urlencode([
                                        ("recaptcha_challenge_field",
                                         challenge),
                                        ("recaptcha_response_field", response)
                                    ])
                                    #recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL

                                    with URLClose(
                                            opener.open(form_action,
                                                        form)) as sa:
                                        for line in sa:
                                            if 'downloadLink' in line:
                                                sa.next()
                                                link_file = sa.next().split(
                                                    'href="')[-1].split('"')[0]
                                                with URLClose(
                                                        opener.open(
                                                            link_file,
                                                            range=(
                                                                self.
                                                                content_range,
                                                                None)),
                                                        always_close=False
                                                ) as sd:
                                                    source = sd  #,content_range)
                                                raise FileLinkFoundException()
                                else:
                                    raise CaptchaException(
                                        "No response from the user")
                                if retry == 2:
                                    raise CaptchaException(
                                        "Captcha, max retries reached")
                        #Link already there O.o
                        elif 'downloadLink' in line:
                            s.next()
                            link_file = s.next().split('href="')[-1].split(
                                '"')[0]
                            with URLClose(opener.open(
                                    link_file,
                                    range=(self.content_range, None)),
                                          always_close=False) as sd:
                                source = sd  #,content_range)
                            raise FileLinkFoundException()
                        #waiting... ?
                        elif "name='tm'" in line:
                            tm = line.split("value='")[-1].split("'")[0]
                            tm_hash = s.next().split("value='")[-1].split(
                                "'")[0]
                            form = urllib.urlencode([("tm", tm),
                                                     ("tm_hash", tm_hash)])
                        #waiting...
                        elif "var countDownDelay" in line:
                            wait = int(line.split("=")[1].split(";")[0])
                            if wait < 60:
                                if self.wait_func(wait):
                                    return self.link, None, err_msg
                                s = opener.open(
                                    form_action, form
                                )  #fetch the page again. but posting the tm, tm_hash
                                break
                            else:
                                raise LimitExceededException("Limit Exceeded")
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, CaptchaException,
                LimitExceededException, LinkNotFoundException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            elif not isinstance(err, FileLinkFoundException):
                logger.info(err)
                err_msg = err
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return self.link, source, err_msg  #puede ser el objeto archivo o None.
Example #23
0
    def add(self):  #wait_func: wait method from thread_managed
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            if "/ul.to/" in self.link:
                file_id = self.link.split("/ul.to/")[-1].split("/")[0]
            else:
                file_id = self.link.split("/file/")[-1].split("/")[0]
            self.link = BASE_URL + "/file/" + file_id

            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            with URLClose(opener.open(self.link)) as s1:
                if self.wait_func():
                    return self.link, None, err_msg
                for line in s1:
                    if 'class="free' in line:
                        try:
                            s1.next()
                            wait = int(s1.next().split("<span>")[-1].split(
                                "</span>")[0])
                        except Exception as err:
                            logger.exception(err)
                            wait = WAITING
                        break
                form = urllib.urlencode({})
                form_url = BASE_URL + "/io/ticket/slot/" + file_id
                with URLClose(opener.open(form_url, form)) as s2:
                    s = s2.read()
                    if "succ:true" in s:
                        if self.wait_func(wait):
                            return self.link, None, err_msg
                        js_url = BASE_URL + "/js/download.js"
                        with URLClose(opener.open(js_url)) as s3:
                            if self.wait_func():
                                return self.link, None, err_msg
                            for line in s3:
                                if 'Recaptcha.create("' in line:
                                    key = line.split('Recaptcha.create("')[
                                        -1].split('"')[0].strip()
                                    break
                        print key
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        for retry in range(3):
                            challenge, response = c.solve_captcha()
                            if response is not None:
                                form_url = BASE_URL + "/io/ticket/captcha/" + file_id
                                form = urllib.urlencode([
                                    ("recaptcha_challenge_field", challenge),
                                    ("recaptcha_response_field", response)
                                ])
                                with URLClose(opener.open(form_url,
                                                          form)) as s4:
                                    if self.wait_func():
                                        return self.link, None, err_msg
                                    s = s4.read()
                                    if "download" in s:
                                        link_file = s.split("url:'")[-1].split(
                                            "'")[0]
                                        print link_file
                                        with URLClose(
                                                opener.open(
                                                    link_file,
                                                    range=(self.content_range,
                                                           None)),
                                                always_close=False) as s5:
                                            source = s5
                                        raise FileLinkFoundException()
                                    elif "limit-dl" in s:
                                        raise LimitExceededException(
                                            "Limit Exceeded")
                                    else:  #{err:"captcha"}
                                        print s
                                        err_msg = "Wrong captcha"
                            else:
                                raise CaptchaException(
                                    "No response from the user")
                    else:
                        LinkErrorException("Link not found")

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, LimitExceededException,
                LinkErrorException, CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return link_file, source, err_msg
Example #24
0
    def add(self):
        """"""
        try:
            link_file = None
            err_msg = None
            source = None
            cookie = cookielib.CookieJar()
            form = None
            max_retries = 3

            for retry in range(max_retries + 1):
                try:
                    file_id = self.link.split(".com/?")[-1].split("/")[0]

                    with URLClose(URLOpen(cookie).open(self.link, form)) as s:
                        if self.wait_func():
                            return self.link, None, err_msg
                        s_lines = s.readlines()
                        for line in s_lines:
                            if 'class="download_link' in line:
                                div_list = line.split('<div')
                                tmp_list = [
                                    div for div in div_list
                                    if 'class="download_link' in div
                                ]
                                tmp_list = [
                                    ref_tag for ref_tag in tmp_list
                                    if file_id in ref_tag
                                ]
                                link_file = tmp_list[0].split(
                                    'href="')[1].split('"')[0]
                            #Recaptcha
                            if "challenge?k=" in line:
                                if retry < (max_retries + 1):
                                    recaptcha_key = line.split(
                                        "challenge?k=")[-1].split('"')[0]
                                    recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key
                                    c = Recaptcha(BASE_URL, recaptcha_link,
                                                  self.wait_func)
                                    challenge, response = c.solve_captcha()
                                    if response is not None:
                                        #Submit the input to the recaptcha system
                                        form = urllib.urlencode([
                                            ("recaptcha_challenge_field",
                                             challenge),
                                            ("recaptcha_response_field",
                                             response), ("downloadp", "")
                                        ])
                                        raise PostCaptchaException(
                                            "Post captcha solution")
                                    else:
                                        raise CaptchaException(
                                            "No response from the user")
                                else:
                                    raise CaptchaException(
                                        "Captcha, max retries reached")
                except PostCaptchaException as err:
                    pass
                else:
                    break

            if link_file is not None:
                with URLClose(URLOpen(cookie).open(link_file,
                                                   range=(self.content_range,
                                                          None)),
                              always_close=False) as s:
                    source = s
                print link_file
            else:
                raise LinkNotFoundException("Link not found")
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            print err
            err_msg = err
        except (CaptchaException, LinkNotFoundException) as err:
            print err
            err_msg = err
            logging.exception(err)
        except Exception as err:
            err_msg = err
            print err
            logging.exception(err)

        return link_file, source, err_msg  #puede ser el objeto archivo o None.
Example #25
0
    def add(self):
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            #url parse
            if "file_id" in self.link:  #most likely not.
                file_id = self.link.split("file_id=")[-1].split("&")[0]
            else:
                file_id = self.link.split("netload.in/datei")[-1].split(
                    "/")[0].split(".")[0]
            self.link = BASE_URL + "/" + "index.php?id=10&file_id=" + file_id

            with URLClose(opener.open(self.link)) as s1:
                if self.wait_func():
                    return self.link, None, err_msg
                for line in s1:
                    if 'class="Free_dl' in line:
                        id = line.split("?id=")[-1].split("&")[0]
                        url = BASE_URL + "/" + line.split('href="')[-1].split(
                            '"')[0].replace("&amp;", "&")
                        break
                with URLClose(opener.open(url)) as s2:
                    for line in s2:
                        if "captcha.php" in line:
                            captcha_url = BASE_URL + "/" + line.split(
                                'src="')[-1].split('"')[0]
                        elif ">countdown(" in line:
                            try:
                                wait = int(
                                    line.split(">countdown(")[-1].split(",")
                                    [0]) / 100  #ms
                            except Exception as err:
                                logger.exception(err)
                                wait = WAITING
                    if self.wait_func(wait + 1):
                        return self.link, None, err_msg
                    captcha_result = tesseract.get_solved_captcha(
                        captcha_url, cookie, self.filter)
                    form = urllib.urlencode([("file_id", file_id),
                                             ("captcha_check", captcha_result),
                                             ("start", "")])
                    captcha_form_url = BASE_URL + "/" + "index.php?id=" + id
                    with URLClose(opener.open(captcha_form_url, form)) as s3:
                        for line in s3:
                            if ">countdown(" in line:
                                try:
                                    wait = int(
                                        line.split(">countdown(")[-1].split(
                                            ",")[0]) / 100  #ms
                                except Exception as err:
                                    logger.exception(err)
                                    wait = WAITING
                            elif 'class="Orange_Link' in line:
                                link_file = line.split('href="')[-1].split(
                                    '"')[0]
                        if wait > 600:  # 10 minutes
                            raise LimitExceededException("Limit exceeded")
                        if self.wait_func(wait + 1):
                            return self.link, None, err_msg
                        with URLClose(opener.open(link_file,
                                                  range=(self.content_range,
                                                         None)),
                                      always_close=False) as s4:
                            source = s4
                        raise FileLinkFoundException()

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, LimitExceededException,
                LinkErrorException, CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return link_file, source, err_msg
Example #26
0
    def add(self):  #wait_func: wait method from thread_managed
        """"""
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            file_id = self.link.split("/files/")[1].split("/")[0]
            self.link = BASE_URL + "/files/" + file_id

            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            form = urllib.urlencode([
                ("gateway_result", "1"),
            ])

            with URLClose(opener.open(self.link)) as s1:
                if self.wait_func():
                    return self.link, None, err_msg
                s1.read()
                with URLClose(opener.open(self.link, form)) as s2:
                    for line in s2:
                        if 'download_waiter_remain">' in line:
                            wait = int(
                                line.split('download_waiter_remain">')
                                [-1].split('<')[0])
                        elif "Recaptcha.create('" in line:
                            key = line.split("Recaptcha.create('")[-1].split(
                                "'")[0]
                        elif "var fid" in line:
                            fid = line.split("'")[1]
                        elif 'limit_interval">' in line:
                            wait = int(
                                line.split('limit_interval">')[-1].split("<")
                                [0])
                            if wait > 320:
                                raise LimitExceededException("Limit Exceeded")

                    if self.wait_func(wait):
                        return self.link, None, err_msg
                    if key is not None:
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        for retry in range(3):
                            if self.wait_func(
                            ):  #wait... if true: download was stopped
                                return self.link, None, err_msg
                            challenge, response = c.solve_captcha()
                            if response is not None:
                                recaptcha_url = BASE_URL + "/get_file.php?fid=" + fid + "&challenge=" + challenge + "&response=" + response
                                with URLClose(
                                        opener.open(recaptcha_url)) as s3:
                                    if self.wait_func():
                                        return self.link, None, err_msg
                                    for line in s3:
                                        if 'form action="' in line and not "recaptcha" in line:
                                            link_file = line.split(
                                                'form action="')[-1].split(
                                                    '"')[0]
                                            #print link_file
                                            with URLClose(
                                                    opener.open(
                                                        link_file,
                                                        range=(
                                                            self.content_range,
                                                            None)),
                                                    always_close=False) as s4:
                                                source = s4
                                            raise FileLinkFoundException()
                                    err_msg = "Wrong captcha"
                            else:
                                raise CaptchaException(
                                    "No response from the user")

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, LimitExceededException,
                LinkErrorException, CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return link_file, source, err_msg
Example #27
0
    def add(self):
        """"""
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            #Remove the filename from the url
            file_id = self.link.split("/oron.com/")[1].split("/")[0]
            self.link = "%s/%s" % (BASE_URL, file_id)

            cookie = cookielib.CookieJar()

            with URLClose(URLOpen(cookie).open(self.link)) as s:
                if self.wait_func():
                    return self.link, None, err_msg
                fname = None
                for line in s:
                    if 'name="fname"' in line:
                        fname = line.split('value="')[-1].split('"')[0]
                if fname is not None:
                    dict_form = {
                        "op": "download1",
                        "usr_login": "",
                        "id": file_id,
                        "fname": fname,
                        "referer": "",
                        "method_free": " Regular Download "
                    }
                    headers = {
                        "Content-type": "application/x-www-form-urlencoded",
                    }
                    with URLClose(
                            URLOpen(cookie).open(self.link,
                                                 urllib.urlencode(dict_form),
                                                 headers=headers)) as sa:
                        if self.wait_func():
                            return self.link, None, err_msg
                        rand = None
                        referer = None
                        recaptcha_key = None
                        for line in sa:
                            if 'id="countdown"' in line:
                                wait = int(
                                    line.split('id="countdown">')[-1].split(
                                        '<')[0].strip())
                            elif 'name="rand"' in line:
                                rand = line.split('value="')[-1].split('"')[0]
                            elif 'name="referer"' in line:
                                referer = line.split('value="')[-1].split(
                                    '"')[0]
                            elif "challenge?k=" in line:
                                recaptcha_key = line.split(
                                    "challenge?k=")[-1].split('"')[0]
                        if None not in (rand, referer, recaptcha_key):
                            if self.wait_func(
                                    wait
                            ):  #wait... if true: download was stopped
                                return self.link, None, err_msg

                            recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key
                            c = Recaptcha(BASE_URL, recaptcha_link,
                                          self.wait_func)
                            challenge, response = c.solve_captcha()
                            if response is not None:
                                dict_form = {
                                    "op": "download2",
                                    "id": file_id,
                                    "rand": rand,
                                    "referer": referer,
                                    "method_free": " Regular Download ",
                                    "method_premium": "",
                                    "recaptcha_challenge_field": challenge,
                                    "recaptcha_response_field": response,
                                    "down_direct": "1"
                                }
                                with URLClose(
                                        URLOpen(cookie).open(
                                            self.link,
                                            urllib.urlencode(dict_form),
                                            headers=headers)) as sb:
                                    if self.wait_func():
                                        return self.link, None, err_msg
                                    for line in sb:
                                        if 'class="atitle"' in line:
                                            link_file = line.split(
                                                'href="')[-1].split('"')[0]
                                        elif "Wrong captcha" in line:
                                            raise CaptchaException(
                                                "Wrong captcha")
                                    if link_file is not None:
                                        with URLClose(
                                                URLOpen(cookie).open(
                                                    link_file,
                                                    range=(self.content_range,
                                                           None)),
                                                always_close=False) as sc:
                                            source = sc
                                    else:  #link not found
                                        raise LinkErrorException("Link Error")
                            else:
                                raise CaptchaException(
                                    "No response from the user")
                        else:  #limit exceeded
                            #TODO: Fix for big files (+1gb), since regular users cant download them
                            raise LimitExceededException("Limit Exceeded")
                else:  #link not found
                    raise LinkErrorException("Link Error")
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (LimitExceededException, CaptchaException,
                LinkErrorException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            err_msg
            logger.exception(err)

        return link_file, source, err_msg  #puede ser el objeto archivo o None.
Example #28
0
    def add(self):
        """"""
        try:
            link_file = None
            err_msg = None
            source = None
            res = ""
            pkr = ""
            cookie = cookielib.CookieJar()
            form = None
            max_retries = 3
            
            for retry in range(max_retries + 1):
                try:
                    #First encrypted page.
                    with URLClose(URLOpen(cookie).open(self.link, form)) as s:
                        if self.wait_func():
                            return self.link, None, err_msg
                        s_lines = s.readlines()
                        for line in s_lines:
                            #Get pKr
                            if "pKr='" in line:
                                pkr = line.split("'")[1].split("'")[0]
                            #Get the last block to unescape
                            if "unescape" in line:
                                tmp = line.split("break;}")[-1]
                                tmp = self.split_eval(tmp)
                                
                                #Eval the block until it's plain text
                                res = self.decrypt(tmp)
                            #Recaptcha
                            if "challenge?k=" in line:
                                if retry < (max_retries + 1):
                                    recaptcha_key = line.split("challenge?k=")[-1].split('"')[0]
                                    recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key
                                    c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                                    challenge, response = c.solve_captcha()
                                    if response is not None:
                                        #Submit the input to the recaptcha system
                                        form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("downloadp", "")])
                                        raise PostCaptchaException("Post captcha solution")
                                    else:
                                        raise CaptchaException("No response from the user")
                                else:
                                    raise CaptchaException("Captcha, max retries reached")
                    
                    id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link
                    
                    pk1 = res.split("'")[3].split("'")[0]
                    qk = res.split("'")[1].split("'")[0] #Public ID of the file
                    
                    for line in s_lines:
                        #Line containing the function to parse
                        if id_func in line:
                            #Try to get the crypted block
                            try:
                                tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1]
                                tmp = self.split_eval(tmp)
                            except Exception as err:
                                print line
                                raise

                            #Decrypt until it's plain text
                            res = self.decrypt(tmp)

                    div_id = res.split('getElementById("')[1].split('"')[0]

                    data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),])

                    form_action = "http://www.mediafire.com/dynamic/download.php?%s" % data
                except PostCaptchaException as err:
                    pass
                else:
                    break
            
            try:
                #Second encrypted page.
                with URLClose(URLOpen(cookie).open(form_action)) as s:
                    if self.wait_func():
                        return self.link, None, err_msg
                    s_lines = s.readlines()
                    for line in s_lines: #s_lines[1:] we dont care about the first line
                        #print "NEW " + line
                        #Table with the real and fakes dl var.
                        if "function dz()" in line:
                            #Decrypt the table containig the final dl var
                            tmp = line.split("break;")[0].split("eval(")
                            for t in tmp:
                                if "unescape" in t:
                                    t = t.replace("\\","")
                                    table = self.decrypt(t)
                        #Result is plain text (small files) not working.
                        if "http://download" in line:
                            #Get all the dl links (even the fake ones)
                            var = line.split('mediafire.com/" +')
                            #Get the number of the server
                            serv = line.split("http://download")[1].split(".")[0] #error toma otra cosa
                            #Get the name of the file
                            name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\")
                            #Find the real link among the fake ones
                            it = iter(var)
                            for tmp in it:
                                #Real link
                                if div_id in tmp:
                                    tmp = it.next()
                                    tmp = tmp.split('+')[0]
                                    #Get the final dl var in the table
                                    dl = table.split(tmp+"=")[1].split(";")[0].strip("'")
                            raise FileLinkFoundException()
                        #Result is encrypted
                        else:
                            tmp = line.split("=''")[-1]
                            tmp = tmp.split("eval(")
                            #Decrypt until the real link is found
                            for t in tmp:
                                if "unescape" in t:
                                    t = t.replace("\\","")
                                    t = t.split("=''")[-1]
                                    res = self.decrypt(t, div_id)
                                    if len(res) == 3:
                                        serv = res[0]
                                        var = res[1]
                                        name = res[2]
                                        raise FileLinkFoundException()
                #if we get here, the link was not found.
                raise LinkNotFoundException("Link not found")
            except FileLinkFoundException as err:
                pass
            
            dl = table.split(var+"=")[1].split(";")[0].strip("'")
            link_file = "http://%s/%sg/%s/%s" % (serv, dl, qk, name)
            
            with URLClose(URLOpen(cookie).open(link_file, range=(self.content_range, None)), always_close=False) as s:
                source = s
            print link_file
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            print err
            err_msg = err
        except (CaptchaException, LinkNotFoundException) as err:
            print err
            err_msg = err
            logging.exception(err)
        except Exception as err:
            err_msg = err
            print err
            logging.exception(err)
        
        return self.link, source, err_msg #puede ser el objeto archivo o None.
Example #29
0
    def add(self):  #wait_func: wait method from thread_managed
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING
        ajax_id = None
        recaptcha_key = None

        try:
            file_id = self.link.split("/files/")[1].split("/")[0]

            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib
            ajax_id_url = BASE_URL + "/files-ajax/" + file_id + "/request.html"

            with URLClose(opener.open(self.link)) as s1:
                for line in s1:
                    if "var ajaxdl" in line:
                        ajax_id = line.split('"')[1]
                    elif "challenge?k=" in line:
                        recaptcha_key = line.split("challenge?k=")[-1].split(
                            '"')[0].strip()
                if not ajax_id:  #not recaptcha_key or not ajax_id:
                    raise LinkErrorException("Link not found.")

                if self.wait_func():
                    return self.link, None, err_msg

                #wait time.
                #note: bitshare does not care for this. It can be skipped.
                #headers = {"Accept:": "application/json", }
                form = urllib.urlencode([("request", "generateID"),
                                         ("ajaxid", ajax_id)])
                with URLClose(opener.open(ajax_id_url, form)) as s2:
                    response = s2.read()  #may return ERROR: explanation
                    wait = int(response.split(":")[1])  #file:60:1
                    if wait > 120:
                        raise LimitExceededException("Limit Exceeded")
                    if self.wait_func(wait):
                        return self.link, None, err_msg

                #recaptcha.
                #note: bitshare does not care for this. It can be skipped.
                if recaptcha_key:
                    recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key
                    for retry in range(3):
                        c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func)
                        challenge, response = c.solve_captcha()
                        if response is not None:
                            form = urllib.urlencode([
                                ("request", "validateCaptcha"),
                                ("ajaxid", ajax_id),
                                ("recaptcha_challenge_field", challenge),
                                ("recaptcha_response_field", response)
                            ])
                            response_ = opener.open(ajax_id_url, form).read(
                            )  #may return ERROR: explanation or SUCCESS
                            if not "ERROR" in response_:
                                break
                        else:
                            raise CaptchaException("No response from the user")
                    if "ERROR" in response_:
                        raise CaptchaException("Wrong captcha")

                if self.wait_func():
                    return self.link, None, err_msg

                #get download link
                form = urllib.urlencode([("request", "getDownloadURL"),
                                         ("ajaxid", ajax_id)])
                with URLClose(opener.open(ajax_id_url, form)) as s3:
                    response = s3.read()
                    link_file = response.split("http")[-1]
                    link_file = "http" + link_file

                with URLClose(URLOpen(cookie).open(link_file,
                                                   range=(self.content_range,
                                                          None)),
                              always_close=False) as sc:
                    source = sc

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (LimitExceededException, CaptchaException,
                LinkErrorException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            err_msg
            logger.exception(err)

        return link_file, source, err_msg