예제 #1
0
 def request(self):
     self.captcha_challenge = None
     self.image_data = None
     try:
         for line in request.get(self.captcha_link).readlines():
             if "challenge : " in line:
                 self.captcha_challenge = line.split("'")[1]
                 handle = request.get("http://www.google.com/recaptcha/api/image?c=%s" % self.captcha_challenge)
                 self.image_data = handle.read()
                 #self.image_type = handle.info()["Content-Type"].split("/")[1]
                 break
     except Exception as err:
         logger.exception("%s :%s" % (self.captcha_link, err))
예제 #2
0
 def request(self):
     self.captcha_challenge = None
     self.image_data = None
     try:
         for line in request.get(self.captcha_link).readlines():
             if "challenge : " in line:
                 self.captcha_challenge = line.split("'")[1]
                 handle = request.get(
                     "http://www.google.com/recaptcha/api/image?c=%s" %
                     self.captcha_challenge)
                 self.image_data = handle.read()
                 #self.image_type = handle.info()["Content-Type"].split("/")[1]
                 break
     except Exception as err:
         logger.exception("%s :%s" % (self.captcha_link, err))
예제 #3
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                for line in s:
                    if '<b title="' in line:
                        name = line.split('<b title="')[-1].split('"')[0]
                        link_status = cons.LINK_ALIVE
                        tmp = s.next().split("<b>")[-1].split("<")[0]
                        size = float(tmp.split("&")[0])
                        unit = tmp[-2:]
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
                if link_status != cons.LINK_ALIVE:
                    link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #4
0
 def get_source(self, chunk, is_first):
     if is_first:
         return self.source
     else:
         return request.get(self.link_file,
                            cookie=self.cookie,
                            range=(chunk[START], None))
예제 #5
0
 def check(self, link):
     """"""
     name = "Unknown"
     size = 0
     status_msg = None
     link_status = cons.LINK_ERROR
     try:
         with URLClose(request.get(link)) as s:
             for line in s:
                 if 'class="dl_first_filename' in line:
                     line = s.next()
                     name = line.split('<span')[0].strip()
                     link_status = cons.LINK_ALIVE
                     tmp = line.split('">')[-1].split("<")[0]
                     size = float(tmp.split(",")[-1].strip().split(" ")[0])
                     unit = tmp.split(" ")[-1]
                     if unit.lower() == "kb":
                         size = size * 1024
                     elif unit.lower() == "mb":
                         size = size * 1024 * 1024
                     elif unit.lower() == "gb":
                         size = size * 1024 * 1024 * 1024
                     break
             if link_status != cons.LINK_ALIVE:
                 link_status = cons.LINK_DEAD
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         status_msg = "Error: {0}".format(err)
     except Exception as err:
         status_msg = "Error: {0}".format(err)
         logger.exception(err)
     
     return link_status, name, size, status_msg
예제 #6
0
 def check(self, link):
     """"""
     name = cons.UNKNOWN
     size = 0
     status_msg = None
     link_status = cons.LINK_ERROR
     #for retry_count in range(RETRIES):
     try:
         with URLClose(request.get(link)) as s:
             found = False
             for line in s:
                 if 'download_file_title">' in line:
                     found = True
                     link_status = cons.LINK_ALIVE
                     name = line.split('download_file_title">')[-1].split('<')[0].strip()
                     tmp = line.split('class="download_link')[1].split('<span>(')[-1].split(')')[0].strip()
                     unit = tmp.split(" ")[-1].strip()
                     size = float(tmp.split(" ")[0].strip())
                     
                     #convert size to bytes.
                     if unit.lower() == "kb":
                         size = size * 1024
                     elif unit.lower() == "mb":
                         size = size * 1024 * 1024
                     elif unit.lower() == "gb":
                         size = size * 1024 * 1024 * 1024
                     break
             if not found:
                 link_status = cons.LINK_DEAD
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         status_msg = "Error: {0}".format(err)
     except Exception as err:
         logger.exception(err)
     return link_status, name, size, status_msg
예제 #7
0
def get_solved_captcha(url, cookie, filter=None):
    """
    @params: filter = a function wraping one or more clean_image functions.
    """
    try:
        with URLClose(request.get(url, cookie=cookie)) as s:
            image_data = s.read()
        t = Tesseract(image_data, filter)
        result = t.get_captcha()
    except Exception as err:
        logger.exception(err)
        return None
    else:
        return result
예제 #8
0
def get_solved_captcha(url, cookie, filter=None):
    """
    @params: filter = a function wraping one or more clean_image functions.
    """
    try:
        with URLClose(request.get(url, cookie=cookie)) as s:
            image_data = s.read()
        t = Tesseract(image_data, filter)
        result = t.get_captcha()
    except Exception as err:
        logger.exception(err)
        return None
    else:
        return result
예제 #9
0
    def check(self, link):
        """"""
        video_id = link.split("&")[0].split("=")[-1]

        for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
            video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                              % (video_id, el_type))
            with URLClose(request.get(video_info_url)) as s:
                video_info = parse_qs(s.read())
                if 'token' in video_info:
                    #print video_info
                    #print video_info_url
                    break

        video_title = urllib.unquote_plus(video_info['title'][0])

        return cons.LINK_ALIVE, video_title, 0, None
예제 #10
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                alive = False
                for line in s:
                    if '<title>' in line:
                        tmp = line.split("-")
                        if len(tmp) > 2:
                            tmp_name = link.split("/files/")[-1].split("/")
                            if len(tmp_name) == 2:
                                name = tmp_name[-1].rstrip(
                                    ".html")  #complete name
                            else:
                                name = tmp[0].strip().split(" ")[
                                    -1]  #shorted name, ie: filenam...part1.rar
                            link_status = cons.LINK_ALIVE
                            alive = True
                        else:
                            link_status = cons.LINK_DEAD
                    elif alive and "<h1>" in line and name in line:
                        tmp = line.split("-")[-1].strip()
                        unit = tmp.split(" ")[-1]  #
                        size = float(tmp.split(" ")[0])
                        #convert size to bytes.
                        if "kb" in unit.lower():
                            size = size * 1024
                        elif "mb" in unit.lower():
                            size = size * 1024 * 1024
                        elif "gb" in unit.lower():
                            size = size * 1024 * 1024 * 1024
                        break
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #11
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR

        try:
            #strip file name
            tmp = link.split("/file/")[1].split("/")[0]
            link = "%s/file/%s" % (BASE_URL, tmp)
            link_quoted = urllib.quote_plus(link)
            with URLClose(
                    request.get(
                        "http://www.filefactory.com/tool/links.php?func=links&links="
                        + link_quoted,
                        timeout=10)) as s:
                alive = False
                for line in s:
                    if 'Available' in line:
                        alive = True
                    elif alive:
                        if 'class="metadata"' in line:
                            name = line.split('class="metadata">')[-1].split(
                                '</div>')[0].split('/')[-1].strip()
                            name = html_entities_parser(name)
                            s.next()
                            size_list = s.next().split("<td>")[-1].split(
                                "</td>")[0].split(" ")
                            #size = "".join(size_list)
                            size = int(float(size_list[0]))
                            link_status = cons.LINK_ALIVE
                            break
            if link_status != cons.LINK_ALIVE:
                link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
            logger.warning(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #12
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                alive = False
                for line in s:
                    if '<title>' in line:
                        tmp = line.split("-")
                        if len(tmp) > 2:
                            tmp_name = link.split("/files/")[-1].split("/")
                            if len(tmp_name) == 2:
                                name = tmp_name[-1].rstrip(".html") #complete name
                            else:
                                name = tmp[0].strip().split(" ")[-1] #shorted name, ie: filenam...part1.rar
                            link_status = cons.LINK_ALIVE
                            alive = True
                        else:
                            link_status = cons.LINK_DEAD
                    elif alive and "<h1>" in line and name in line:
                        tmp = line.split("-")[-1].strip()
                        unit = tmp.split(" ")[-1] #
                        size = float(tmp.split(" ")[0])
                        #convert size to bytes.
                        if "kb" in unit.lower():
                            size = size * 1024
                        elif "mb" in unit.lower():
                            size = size * 1024 * 1024
                        elif "gb" in unit.lower():
                            size = size * 1024 * 1024 * 1024
                        break
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #13
0
    def parse(self, link):
        #TODO: use findall.
        source = request.get(link).read(1024 * 1024)

        #Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', source)
        if mobj is None:
            #Broaden the search a little bit
            mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', source)
        if mobj is None:
            mobj = re.search(r'(?:file|source)[\s]+src=["|\'](http[^\'"&]*)', source)
        if mobj is None:
            #nothing found
            return

        # It's possible that one of the regexes
        # matched, but returned an empty group:
        if mobj.group(1) is None:
            #nothing found
            return

        video_url = urllib.unquote(mobj.group(1))
        self.video_list.append(video_url)
예제 #14
0
 def check(self, link):
     """"""
     name = "Unknown"
     size = 0
     status_msg = None
     link_status = cons.LINK_ERROR
     
     try:
         #strip file name
         tmp = link.split("/file/")[1].split("/")[0]
         link = "%s/file/%s" % (BASE_URL, tmp)
         link_quoted = urllib.quote_plus(link)
         with URLClose(request.get("http://www.filefactory.com/tool/links.php?func=links&links=" + link_quoted, timeout=10)) as s:
             alive = False
             for line in s:
                 if 'Available' in line:
                     alive = True
                 elif alive:
                     if 'class="metadata"' in line:
                         name = line.split('class="metadata">')[-1].split('</div>')[0].split('/')[-1].strip()
                         name = html_entities_parser(name)
                         s.next()
                         size_list = s.next().split("<td>")[-1].split("</td>")[0].split(" ")
                         #size = "".join(size_list)
                         size = int(float(size_list[0]))
                         link_status = cons.LINK_ALIVE
                         break
         if link_status != cons.LINK_ALIVE:
             link_status = cons.LINK_DEAD
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         status_msg = "Error: {0}".format(err)
         logger.warning(err)
     except Exception as err:
         status_msg = "Error: {0}".format(err)
         logger.exception(err)
     
     return link_status, name, size, status_msg
예제 #15
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link, timeout=10)) as s:
                for line in s:
                    if 'class="f_arial f_14px"' in line:
                        name = line.split('"f_arial f_14px">')[-1].split(
                            '<')[0].strip()
                        name = misc.html_entities_parser(name)
                        tmp = s.next().split(":")[-1].split("<")[0].strip()
                        unit = tmp.split(" ")[-1].strip()
                        size = float(tmp.split(" ")[0].strip())
                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
            if size:
                link_status = cons.LINK_ALIVE
            else:
                link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            name, size = cons.UNKNOWN, 0
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #16
0
 def check(self, link):
     """"""
     name = cons.UNKNOWN
     size = 0
     status_msg = None
     link_status = cons.LINK_ERROR
     #for retry_count in range(RETRIES):
     try:
         with URLClose(request.get(link)) as s:
             for line in s:
                 if 'name="description"' in line:
                     name = line.split('content="')[-1].split(" | Free file hosting")[0]
                     name = utils.html_entities_parser(name)
                 elif "File Size:</b>" in line:
                     tmp = line.split("</b>")[-1].split("</div>")[0].strip()
                     unit = tmp[-2:]
                     size = float(tmp[:-2])
                     #convert size to bytes.
                     if unit == "KB":
                         size = size * 1024
                     elif unit == "MB":
                         size = size * 1024 * 1024
                     elif unit == "GB":
                         size = size * 1024 * 1024 * 1024
                     break
         if size:
             link_status = cons.LINK_ALIVE
         else:
             link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
     except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
         status_msg = "Error: {0}".format(err)
     except Exception as err:
         name, size = cons.UNKNOWN, 0
         logger.exception(err)
     
     return link_status, name, size, status_msg
예제 #17
0
    def check(self, link):
        """"""
        name = "Unknown"
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link, timeout=10)) as s:
                for line in s:
                    if 'class="f_arial f_14px"' in line:
                        name = line.split('"f_arial f_14px">')[-1].split('<')[0].strip()
                        name = misc.html_entities_parser(name)
                        tmp = s.next().split(":")[-1].split("<")[0].strip()
                        unit = tmp.split(" ")[-1].strip()
                        size = float(tmp.split(" ")[0].strip())
                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
            if size:
                link_status = cons.LINK_ALIVE
            else:
                link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            status_msg = "Error: {0}".format(err)
            name, size = cons.UNKNOWN, 0
            logger.exception(err)

        return link_status, name, size, status_msg
예제 #18
0
    def check(self, link):
        """"""
        name = cons.UNKNOWN
        size = 0
        status_msg = None
        link_status = cons.LINK_ERROR
        #for retry_count in range(RETRIES):
        try:
            with URLClose(request.get(link)) as s:
                found = False
                for line in s:
                    if 'download_file_title">' in line:
                        found = True
                        link_status = cons.LINK_ALIVE
                        name = line.split('download_file_title">')[-1].split(
                            '<')[0].strip()
                        tmp = line.split('class="download_link')[1].split(
                            '<span>(')[-1].split(')')[0].strip()
                        unit = tmp.split(" ")[-1].strip()
                        size = float(tmp.split(" ")[0].strip())

                        #convert size to bytes.
                        if unit.lower() == "kb":
                            size = size * 1024
                        elif unit.lower() == "mb":
                            size = size * 1024 * 1024
                        elif unit.lower() == "gb":
                            size = size * 1024 * 1024 * 1024
                        break
                if not found:
                    link_status = cons.LINK_DEAD
        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            status_msg = "Error: {0}".format(err)
        except Exception as err:
            logger.exception(err)
        return link_status, name, size, status_msg
예제 #19
0
 def get_source(self, chunk, is_first):
     if is_first:
         return self.source
     else:
         return request.get(self.link_file, cookie=self.cookie, range=(chunk[START], None))