def request(self): self.captcha_challenge = None self.image_data = None try: for line in request.get(self.captcha_link).readlines(): if "challenge : " in line: self.captcha_challenge = line.split("'")[1] handle = request.get("http://www.google.com/recaptcha/api/image?c=%s" % self.captcha_challenge) self.image_data = handle.read() #self.image_type = handle.info()["Content-Type"].split("/")[1] break except Exception as err: logger.exception("%s :%s" % (self.captcha_link, err))
def request(self): self.captcha_challenge = None self.image_data = None try: for line in request.get(self.captcha_link).readlines(): if "challenge : " in line: self.captcha_challenge = line.split("'")[1] handle = request.get( "http://www.google.com/recaptcha/api/image?c=%s" % self.captcha_challenge) self.image_data = handle.read() #self.image_type = handle.info()["Content-Type"].split("/")[1] break except Exception as err: logger.exception("%s :%s" % (self.captcha_link, err))
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: for line in s: if '<b title="' in line: name = line.split('<b title="')[-1].split('"')[0] link_status = cons.LINK_ALIVE tmp = s.next().split("<b>")[-1].split("<")[0] size = float(tmp.split("&")[0]) unit = tmp[-2:] if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def get_source(self, chunk, is_first): if is_first: return self.source else: return request.get(self.link_file, cookie=self.cookie, range=(chunk[START], None))
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR try: with URLClose(request.get(link)) as s: for line in s: if 'class="dl_first_filename' in line: line = s.next() name = line.split('<span')[0].strip() link_status = cons.LINK_ALIVE tmp = line.split('">')[-1].split("<")[0] size = float(tmp.split(",")[-1].strip().split(" ")[0]) unit = tmp.split(" ")[-1] if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: found = False for line in s: if 'download_file_title">' in line: found = True link_status = cons.LINK_ALIVE name = line.split('download_file_title">')[-1].split('<')[0].strip() tmp = line.split('class="download_link')[1].split('<span>(')[-1].split(')')[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if not found: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: logger.exception(err) return link_status, name, size, status_msg
def get_solved_captcha(url, cookie, filter=None): """ @params: filter = a function wraping one or more clean_image functions. """ try: with URLClose(request.get(url, cookie=cookie)) as s: image_data = s.read() t = Tesseract(image_data, filter) result = t.get_captcha() except Exception as err: logger.exception(err) return None else: return result
def check(self, link): """""" video_id = link.split("&")[0].split("=")[-1] for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) with URLClose(request.get(video_info_url)) as s: video_info = parse_qs(s.read()) if 'token' in video_info: #print video_info #print video_info_url break video_title = urllib.unquote_plus(video_info['title'][0]) return cons.LINK_ALIVE, video_title, 0, None
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: alive = False for line in s: if '<title>' in line: tmp = line.split("-") if len(tmp) > 2: tmp_name = link.split("/files/")[-1].split("/") if len(tmp_name) == 2: name = tmp_name[-1].rstrip( ".html") #complete name else: name = tmp[0].strip().split(" ")[ -1] #shorted name, ie: filenam...part1.rar link_status = cons.LINK_ALIVE alive = True else: link_status = cons.LINK_DEAD elif alive and "<h1>" in line and name in line: tmp = line.split("-")[-1].strip() unit = tmp.split(" ")[-1] # size = float(tmp.split(" ")[0]) #convert size to bytes. if "kb" in unit.lower(): size = size * 1024 elif "mb" in unit.lower(): size = size * 1024 * 1024 elif "gb" in unit.lower(): size = size * 1024 * 1024 * 1024 break except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR try: #strip file name tmp = link.split("/file/")[1].split("/")[0] link = "%s/file/%s" % (BASE_URL, tmp) link_quoted = urllib.quote_plus(link) with URLClose( request.get( "http://www.filefactory.com/tool/links.php?func=links&links=" + link_quoted, timeout=10)) as s: alive = False for line in s: if 'Available' in line: alive = True elif alive: if 'class="metadata"' in line: name = line.split('class="metadata">')[-1].split( '</div>')[0].split('/')[-1].strip() name = html_entities_parser(name) s.next() size_list = s.next().split("<td>")[-1].split( "</td>")[0].split(" ") #size = "".join(size_list) size = int(float(size_list[0])) link_status = cons.LINK_ALIVE break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) logger.warning(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: alive = False for line in s: if '<title>' in line: tmp = line.split("-") if len(tmp) > 2: tmp_name = link.split("/files/")[-1].split("/") if len(tmp_name) == 2: name = tmp_name[-1].rstrip(".html") #complete name else: name = tmp[0].strip().split(" ")[-1] #shorted name, ie: filenam...part1.rar link_status = cons.LINK_ALIVE alive = True else: link_status = cons.LINK_DEAD elif alive and "<h1>" in line and name in line: tmp = line.split("-")[-1].strip() unit = tmp.split(" ")[-1] # size = float(tmp.split(" ")[0]) #convert size to bytes. if "kb" in unit.lower(): size = size * 1024 elif "mb" in unit.lower(): size = size * 1024 * 1024 elif "gb" in unit.lower(): size = size * 1024 * 1024 * 1024 break except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def parse(self, link): #TODO: use findall. source = request.get(link).read(1024 * 1024) #Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', source) if mobj is None: #Broaden the search a little bit mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', source) if mobj is None: mobj = re.search(r'(?:file|source)[\s]+src=["|\'](http[^\'"&]*)', source) if mobj is None: #nothing found return # It's possible that one of the regexes # matched, but returned an empty group: if mobj.group(1) is None: #nothing found return video_url = urllib.unquote(mobj.group(1)) self.video_list.append(video_url)
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR try: #strip file name tmp = link.split("/file/")[1].split("/")[0] link = "%s/file/%s" % (BASE_URL, tmp) link_quoted = urllib.quote_plus(link) with URLClose(request.get("http://www.filefactory.com/tool/links.php?func=links&links=" + link_quoted, timeout=10)) as s: alive = False for line in s: if 'Available' in line: alive = True elif alive: if 'class="metadata"' in line: name = line.split('class="metadata">')[-1].split('</div>')[0].split('/')[-1].strip() name = html_entities_parser(name) s.next() size_list = s.next().split("<td>")[-1].split("</td>")[0].split(" ") #size = "".join(size_list) size = int(float(size_list[0])) link_status = cons.LINK_ALIVE break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) logger.warning(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link, timeout=10)) as s: for line in s: if 'class="f_arial f_14px"' in line: name = line.split('"f_arial f_14px">')[-1].split( '<')[0].strip() name = misc.html_entities_parser(name) tmp = s.next().split(":")[-1].split("<")[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: for line in s: if 'name="description"' in line: name = line.split('content="')[-1].split(" | Free file hosting")[0] name = utils.html_entities_parser(name) elif "File Size:</b>" in line: tmp = line.split("</b>")[-1].split("</div>")[0].strip() unit = tmp[-2:] size = float(tmp[:-2]) #convert size to bytes. if unit == "KB": size = size * 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link, timeout=10)) as s: for line in s: if 'class="f_arial f_14px"' in line: name = line.split('"f_arial f_14px">')[-1].split('<')[0].strip() name = misc.html_entities_parser(name) tmp = s.next().split(":")[-1].split("<")[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: found = False for line in s: if 'download_file_title">' in line: found = True link_status = cons.LINK_ALIVE name = line.split('download_file_title">')[-1].split( '<')[0].strip() tmp = line.split('class="download_link')[1].split( '<span>(')[-1].split(')')[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if not found: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: logger.exception(err) return link_status, name, size, status_msg