def search(self, what, cat="all"): """ Performs search """ query = "".join( ( self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, "0"), "&active=1", ) ) get_table = re_compile('(?s)<table\sclass="lista".*>(.*)</table>') data = get_table.search(retrieve_url(query)).group(0) # extract first ten pages of next results next_pages = re_compile('(?m)<option value="(.*)">[0-9]+</option>') next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]] parser = self.MyHtmlParseWithBlackJack(self.url) parser.feed(data) parser.close() for page in next_pages: parser.feed(get_table.search(retrieve_url(page)).group(0)) parser.close()
def search(self, what, cat='all'): """ Performs search """ #prepare query cat = self.supported_categories[cat.lower()] query = "".join((self.url, "/files/?category=", cat, "&subcategory=All&quality=All&seeded=2&external=2&query=", what, "&uid=0&sort=S")) data = retrieve_url(query) add_res_list = re_compile("/files.*page=[0-9]+") torrent_list = re_compile("start torrent list -->(.*)<!-- end torrent", DOTALL) data = torrent_list.search(data).group(0) list_results = add_res_list.findall(data) parser = self.MyHtmlParseWithBlackJack(self.url) parser.feed(data) del data if list_results: for search_query in islice((add_res_list.search(result).group(0) for result in list_results[1].split(" | ")), 0, 5): response = retrieve_url(self.url + search_query) parser.feed(torrent_list.search(response).group(0)) parser.close() return
def search(self, what, cat='all'): # Get token baseURL = "https://torrentapi.org/pubapi_v2.php?%s" params = urlencode({'get_token': 'get_token', 'app_id' : 'qbittorrent'}) response = retrieve_url(baseURL % params) j = json.loads(response) token = j['token'] sleep(2.1) # get JSON what = unquote(what) category = self.supported_categories[cat] params = urlencode({ 'mode': 'search', 'search_string': what, 'ranked': 0, 'category': category, 'limit': 100, 'sort': 'seeders', 'format': 'json_extended', 'token': token, 'app_id' : 'qbittorrent' }) response = retrieve_url(baseURL % params) j = json.loads(response) for i in j['torrent_results']: tbytes = float(i['size']) size = "-1" if tbytes > 1024 * 1024 * 1024: size = "%.1f GB" % (tbytes / (1024 * 1024 * 1024)) elif tbytes > 1024 * 1024: size = "%.1f MB" % (tbytes / (1024 * 1024)) elif tbytes > 1024: size = "%.1f KB" % (tbytes / 1024) else: size = "%.1f B" % (tbytes) res = dict(link=i['download'], name=i['title'], size=size, seeds=i['seeders'], leech=i['leechers'], engine_url=self.url, desc_link=i['info_page']) prettyPrinter(res)
def search(self, what, cat='all'): # Get token baseURL = "https://torrentapi.org/pubapi_v2.php?%s" params = urlencode({'get_token': 'get_token'}) response = retrieve_url(baseURL % params) j = json.loads(response) token = j['token'] # get JSON what = unquote(what) categories = "1;4;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48" params = urlencode({'mode': 'search', 'search_string': what, 'ranked': 0, 'category': categories, 'limit': 100, 'sort': 'seeders', 'format': 'json_extended', 'token': token} ) response = retrieve_url(baseURL % params) j = json.loads(response) for i in j['torrent_results']: tbytes = float(i['size']) size = "-1" if tbytes > 1024 * 1024 * 1024: size = "%.1f GB" % (tbytes / (1024 * 1024 * 1024)) elif tbytes > 1024 * 1024: size = "%.1f MB" % (tbytes / (1024 * 1024)) elif tbytes > 1024: size = "%.1f KB" % (tbytes / 1024) else: size = "%.1f B" % (tbytes) res = dict(link=i['download'], name=i['title'], size=size, seeds=i['seeders'], leech=i['leechers'], engine_url=self.url, desc_link=i['info_page']) prettyPrinter(res)
def search(self, what, cat='all'): """ Method called by nova2. `what` is the already scaped search string, while `cat` restricts in which category the search should be performed. For each parsed line of the result, we put it in a dictionary and pass it to the prettyPrint function. """ data = retrieve_url('http://psychocydd.co.uk/torrents.php?search=%s' % what) soup = BeautifulSoup(data) res = soup.find_all('table', attrs={'width': '100%', 'class': 'lista'}) rows = res[5].find_all('tr') # by inspection, we want res[5] for row in rows[2:]: # by inspection, we want rows[2:] cells = row.find_all('td') # Columns of interest, all determined by inspection info = { 'name': cells[1].a.text, 'link': self.url + '/' + cells[4].a['href'], 'size': cells[6].text, 'seeds': cells[7].text, 'leech': cells[8].text, 'engine_url': self.url, } prettyPrinter(info)
def search(self, what, cat='all'): # Remove {} since isohunt does not seem # to handle those very well what = what.replace('{', '').replace('}', '') i = 1 while True and i<11: res = 0 dat = retrieve_url(self.url+'/torrents.php?ihq=%s&iht=%s&ihp=%s&ihs1=2&iho1=d'%(what, self.supported_categories[cat],i)) # I know it's not very readable, but the SGML parser feels in pain section_re = re.compile('(?s)id=link.*?</tr><tr') torrent_re = re.compile('(?s)torrent_details/(?P<link>.*?[^/]+).*?' '>(?P<name>.*?)</a>.*?' '>(?P<size>[\d,\.]+\s+MB)</td>.*?' '>(?P<seeds>\d+)</td>.*?' '>(?P<leech>\d+)</td>') for match in section_re.finditer(dat): txt = match.group(0) m = torrent_re.search(txt) if m: torrent_infos = m.groupdict() torrent_infos['name'] = re.sub('<.*?>', '', torrent_infos['name']) torrent_infos['engine_url'] = self.url torrent_code = torrent_infos['link'] torrent_infos['link'] = self.url + '/download/' + torrent_code torrent_infos['desc_link'] = self.url + '/torrent_details/' + torrent_code + '/dvdrip?tab=summary' prettyPrinter(torrent_infos) res = res + 1 if res == 0: break i = i + 1
def search(self, what, cat='all'): req = urllib.unquote(what) i = 0 results = 0 while i < 3: data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i))) for line in data.splitlines(): if line.startswith('#'): continue info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6] name = name.replace('|', '') res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))), name = name, size = size, seeds = int(dl), leech = int(dl), engine_url = self.url, desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req)))) prettyPrinter(res) results += 1 if results == 0: break i += 1
def search_page(self, what, cat, start): results = [] cat = self.supported_categories[cat] url = self.url + '/browse-movie/%s/720p/%s/0/seeds/%s'%(what, cat, start) # print url html = retrieve_url(url) soup = BeautifulSoup(html) divs = soup.findAll('div',{'class':'browse-info'}) for div in divs: d = dict() info = div.findNext('span',{'class':'info'}) links = div.findNext('span',{'class':'links'}) name_span = info.findNext('span',{'class':'browseTitleLink'}) d['name'] = name_span.text size_span = info.findNext('span',{'class':'browseInfoList'}) d['size'] = size_span.text.replace('Size:','') browseSeeds_span = info.findNext('span',{'class':'browseSeeds'}) leech_span = browseSeeds_span.findNext('span',{'class':'peers'}) d['leech'] = leech_span.text.replace('Peers:','') seeds_span = browseSeeds_span.findNext('span',{'class':'seeds'}) d['seeds'] = seeds_span.text.replace('Seeds:','') desc_link = links.findNext('a') d['desc_link'] = desc_link['href'] d['link'] = desc_link.findNext('a')['href'] d['engine_url'] = self.url results.append(d) return results
def search(self, what, cat="all"): json_data = retrieve_url( "".join( ( self.url, "api/v2/torrents/search/?phrase=", what, "&category=", self.supported_categories.get(cat, ""), ) ) ) json_dict = json.loads(json_data) if json_dict["results"] < 1: return for r in json_dict["torrents"]: r_dict = { "link": r["magnet_uri"], "name": r["torrent_title"], "size": str(r["size"]) + "B", "seeds": r["seeds"], "leech": r["leeches"], "desc_link": r["page"], "engine_url": self.url, } prettyPrinter(r_dict)
def search(self, what, cat='all'): i = 1 while True and i<11: results = [] url = self.url+'/api/list.json?sort=seeds&limit=50&keywords=%s&set=%s&genre=%s'%(what, i, self.supported_categories[cat]) json_data = retrieve_url(url) try: json_dict = json.loads(json_data) except: i += 1 continue try: results = json_dict['MovieList'] except KeyError: return else: for r in results: res_dict = dict() res_dict['name'] = r['MovieTitle'] res_dict['size'] = r['Size'] res_dict['seeds'] = r['TorrentSeeds'] res_dict['leech'] = r['TorrentPeers'] res_dict['link'] = r['TorrentUrl'] res_dict['desc_link'] = r['MovieUrl'] res_dict['engine_url'] = self.url prettyPrinter(res_dict) i += 1
def search(self, what, cat='all'): # Remove {} since btjunkie does not seem # to handle those very well what = what.replace('{', '').replace('}', '') ret = [] i = 1 # ru: format changed, no need to loop while True and i<2: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = retrieve_url(self.url+'/search/%s/%s/'%(self.supported_categories[cat], what)) # Remove <font> tags from page p = re.compile( '<[/]?font.*?>') dat = p.sub('', dat) #print dat #return results_re = re.compile('(?s)<span style="float:left">Torrent Name</span>.*</table>') for match in results_re.finditer(dat): res_tab = match.group(0) parser.feed(res_tab) parser.close() break if len(results) <= 0: break i += 1
def search(self, what, cat='all'): ret = [] i = 1 while True and i<11: results = [] json_data = retrieve_url(self.url+'/json.php?q=%s&page=%d'%(what, i)) try: json_dict = json.loads(json_data) except: i += 1 continue if int(json_dict['total_results']) <= 0: return results = json_dict['list'] for r in results: try: if cat != 'all' and self.supported_categories[cat] != r['category']: continue res_dict = dict() res_dict['name'] = r['title'] res_dict['size'] = str(r['size']) res_dict['seeds'] = r['seeds'] res_dict['leech'] = r['leechs'] res_dict['link'] = r['torrentLink'] res_dict['desc_link'] = r['link'] res_dict['engine_url'] = self.url prettyPrinter(res_dict) except: pass i += 1
def search(self, what, cat="all"): """ Performs search """ query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat])) response = retrieve_url(query) list_searches = [] parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) parser.feed(response) parser.close() for search_query in list_searches: response = retrieve_url(self.url + search_query) parser.feed(response) parser.close() return
def search(self, query, cat='all'): query = query.replace(' ', '+') parser = self.MyHtmlParseWithBlackJack(self.url) torrent_list = re_compile("(?s)<table class=\"listing\">(.*)</table>") additional_links = re_compile("/?lastid=[0-9]+&page=[0-9]+&terms={}".format(query)) request_url = '{0}/search.php?terms={1}&type={2}&size_min=&size_max=&username='.format(self.url, query, self.supported_categories[cat]) data = retrieve_url(request_url) data = torrent_list.search(data).group(0) parser.feed(data) parser.close() for res_link in map(lambda link: "".join((self.url, "/search.php?", link.group(0))), additional_links.finditer(data)): data = retrieve_url(res_link) data = torrent_list.search(data).group(0) parser.feed(data) parser.close()
def search(self, what, cat="all"): """ Performs search """ query = "/".join((self.url, "search", what, self.supported_categories[cat], "seeds")) response = retrieve_url(query) list_searches = [] parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) parser.feed(response) parser.close() parser.next_queries = False for search_query in list_searches: response = retrieve_url(self.url + search_query) parser.feed(response) parser.close() return
def search(self, what, cat='all'): json_data = retrieve_url(self.url + 'api/v2/torrents/search/?phrase=' + what + '&category=' + self.supported_categories.get(cat, '')) json_dict = json.loads(json_data) if json_dict['results'] < 1: return for r in json_dict['torrents']: r_dict = {'link': r['magnet_uri'], 'name': r['torrent_title'], 'size': str(r['size']) + 'B', 'seeds': r['seeds'], 'leech': r['leeches'], 'desc_link': r['page'], 'engine_url': self.url} prettyPrinter(r_dict)
def search(self, query, cat='all'): """ Performs search """ query = query.replace("%20", "-") parser = self.MyHtmlParser(self.url) page = "".join((self.url, "/", self.supported_categories[cat], "/torrents/", query, ".html?sort=seeds&page=1")) html = retrieve_url(page) parser.feed(html) counter = 1 additional_pages = re_compile("/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+".format(self.supported_categories[cat], query)) list_searches = additional_pages.findall(html)[:-1] #last link is next(i.e. second) for page in map(lambda link: "".join((self.url, link)), list_searches): html = retrieve_url(page) parser.feed(html) counter += 1 if counter > 3: break parser.close()
def search(self, what, cat='all'): i = 1 while True and i < 11: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = retrieve_url(self.url + '/?page=search&term=%s&offset=%d&cats=%s' % (what, i, self.supported_categories[cat])) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1
def search(self, what, cat="all"): for page in range(1, 35): results = [] parser = self.SimpleHTMLParser(results) data = retrieve_url( self.url + '/search?page={}&search={}'.format(page, what) ) parser.feed(data) parser.close() if len(results) <= 0: break
def search(self, what, cat='all'): i = 0 while True and i<11: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = retrieve_url(self.url+'/ts.php?search=&words=%s&cid=%s&sid=&type=1&orderby=a.seeds&asc=0&skip=%s'%(what, self.supported_categories[cat], (i*35))) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1
def search(self, what, cat='all'): """ Performs search """ #prepare query. 7 is filtering by seeders cat = cat.lower() query = "/".join((self.url, "search", what, "0", "7", self.supported_categories[cat])) response = retrieve_url(query) list_searches = [] parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) parser.feed(response) parser.close() parser.add_query = False for search_query in list_searches: response = retrieve_url(self.url + search_query) parser.feed(response) parser.close() return
def search(self, what, cat='all'): ret = [] i = 0 while True and i<11: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = retrieve_url(self.url+'/searchResult.php?search=%s&lngMainCat=%s&order=seeders&by=down&start=%d'%(what, self.supported_categories[cat], i)) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1
def search(self, what, cat='all'): results_list = [] parser = self.MyHtmlParser(results_list, self.url) i = 0 while i < 6: dat = retrieve_url(self.url+'/searchResult.php?search=%s&lngMainCat=%s&order=seeders&by=down&start=%d'%(what, self.supported_categories[cat], i)) parser.feed(dat) if len(results_list) < 1: break del results_list[:] i += 1 parser.close()
def search(self, what, cat='all'): ret = [] i = 0 order = 'se' while True and i<11: results = [] parser = self.SimpleSGMLParser(results, self.url) dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat])) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1
def search(self, what, cat='all'): i = 1 results = [] parser = self.NTParser(results, self.url) while i < 51: dat = retrieve_url( '%s/?page=search&term=%s&offset=%d&cats=%s' % (self.url, what, i, self.supported_categories[cat])) parser.feed(dat) if len(results) <= 0: break del results[:] i += 1 parser.close()
def search(self, what, cat='all'): for page in range(100): results = [] parser = self.SimpleHTMLParser(results, self.url) data = '' for t411_cat in self.supported_categories[cat]: path = ('/torrents/search/?{}&search={}&order=seeders&type=desc&page={}' .format(t411_cat, what, page)) data += retrieve_url(self.url + path) parser.feed(data) parser.close() if len(results) <= 0: break
def search(self, what, cat="all"): for page in range(35): results = [] parser = self.SimpleHTMLParser(results, self.url) for subcat in self.supported_categories[cat]: data = retrieve_url( '{}/recherche/{}{}/page-{},trie-seeds-d' .format(self.url, subcat, what, page) ) parser.feed(data) parser.close() if len(results) <= 0: break
def search(self, what, cat='all'): i = 0 dat = '' results = [] parser = self.SimpleHTMLParser(results, self.url) while i < 9: dat = retrieve_url('%s/torrents-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat])) parser.feed(dat) if len(results) <= 0: break del results[:] i += 1 parser.close()
def search(self, what, cat='all'): """ Performs search """ results_list = [] parser = self.MyHtmlParser(results_list, self.url) i = 1 while i < 31: # "what" is already urlencoded html = retrieve_url(self.url + '/q/%s/%d?sort=popular' % (what, i)) parser.feed(html) if len(results_list) < 1: break del results_list[:] i += 1 parser.close()
def search(self, what, cat='all'): # initialize trackers for magnet links trackers = '&' + '&'.join(urlencode({'tr' : tracker}) for tracker in self.trackers_list) i = 0 while i < 6: results_list = [] # "what" is already urlencoded html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i)) parser = self.MyHtmlParser(results_list, self.url, trackers) parser.feed(html) parser.close() if len(results_list) < 1: break i += 1
def search(self, what, cat='all',sorttype='-1',page='1'): result={} result['state']=False result['list']=[] searchurl=self.url+'%s/%s'%(urllib.quote(what),str(int(page))) #plugin.notify(searchurl) try: pageresult = retrieve_url(searchurl) #xbmc.log(msg=pageresult) rmain=r'target="_blank">(?P<title>.*?)</a></span>.*?收录时间.*?value">(?P<createtime>.*?)</span>.*?大小.*?value">(?P<filesize>.*?)</span>.*?文件数.*?value">(?P<filecount>.*?)</span>.*?href="(?P<magnet>.*?)">磁力链接' reobj = re.compile(rmain, re.DOTALL) for match in reobj.finditer(pageresult): title=match.group('title') #plugin.notify(title) filesize=match.group('filesize') createtime=match.group('createtime') title=title.replace("<span class='highlight'>","").replace('</span>','') filecount=match.group('filecount') filesize=filesize.replace(' ',' ') createtime=createtime.replace(' ',' ') magnet=match.group('magnet') res_dict = dict() res_dict['name'] = title res_dict['size'] = filesize res_dict['seeds'] = '' res_dict['leech'] = '' res_dict['link'] = magnet res_dict['date'] =createtime res_dict['desc_link'] = '' res_dict['engine_url'] = self.url result['list'].append(res_dict) if len(result['list'])>0: result['nextpage']=True except: return result result['state']=True return result
def search(self, what, cat='all'): query = "http://down.ali213.net/search?kw=" + what + "&submit=" data = retrieve_url(query) found_games = re.findall(self.result_page_match, data) if found_games: if self.games_to_parse > len(found_games): self.games_to_parse = len(found_games) # handling each gamepage in parallel, to not waste time on waiting for requests # for 10 games this speeds up from 37s to 6s run time threads = [] for i in range(self.games_to_parse): t = threading.Thread(target=self.handle_gamepage, args=(found_games[i],)) threads.append(t) t.start() # search method needs to stay alive until all threads are done for t in threads: t.join() return
def search(self, what, cat='all'): req = urllib.unquote(what) what_list = req.decode('utf8').split() i = 0 results = 0 while i < 3: data = retrieve_url( 'https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q=req, p=i))) for line in data.splitlines(): if line.startswith('#'): continue info_hash, name, files, size, dl, seen = line.strip().split( '\t')[:6] name = name.replace('|', '') # BTDigg returns unrelated results, we need to filter if not all(word in name.lower() for word in what_list): continue res = dict( link='magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))), name=name, size=size, seeds=int(dl), leech=int(dl), engine_url=self.url, desc_link='%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash=info_hash, q=req)))) prettyPrinter(res) results += 1 if results == 0: break i += 1
def search(self, what, cat="all"): """ Performs search """ query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat])) response = retrieve_url(query) xmldoc = minidom.parseString(response) itemlist = xmldoc.getElementsByTagName('item') for item in itemlist: current_item = current_item = {"engine_url" : self.url} current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data if not current_item["leech"].isdigit(): current_item["leech"] = '' current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data if not current_item["seeds"].isdigit(): current_item["seeds"] = '' prettyPrinter(current_item) return
def search(self, what, cat='all'): query = self.url + '/api/v1/search/?q='\ + what\ + '&sort=seeders&category='\ + self.supported_categories[cat] response = json.loads(retrieve_url(query)) all_results = response['hits']['value'] for item in response['results']: self.process_record(item) i = 20 threads = list() while all_results > i: a = threading.Thread(target=self.launch_request, args=(query, i)) threads.append(a) a.start() i += 20 for thread in threads: thread.join()
def search_threaded(url): logging.debug('Starting a new thread.') # PAGE_QUERY = re.compile(r'<a href="(magnet.*?)">.*?<span style="padding:0 5px 10px 7px;word-wrap: break-word;">(.+?)<\/span>.*?<span>Seed:(.*?)<\/span>.*?<\/i>(.*?)<\/span>', re.DOTALL) page_query_regex = re.compile( 'break-word;">(.*?)</span>.*?Seed:(.*?)</span>.*?Leech:(.*?)</span>.*?Size:(.*?)</span>.*?href="(magnet.*?)"', re.DOTALL) magnet_query_regex = re.compile('href="(magnet.*?)"') empty_page_regex = re.compile( r'(<h3 style="color:blue;">No Results Found for \()') curr_record = list() curr_dict = dict() curr_record.clear() curr_dict.clear() try: a_v = retrieve_url(url) except: print('failed to connect') quit() match = re.search(empty_page_regex, a_v) if match: print('another one bites the dust') quit() c_v = re.findall(magnet_query_regex, a_v) for b_v in re.finditer(page_query_regex, a_v): for x in range(1, 5, 1): curr_record.append(b_v.group(x).strip()) curr_dict['link'] = c_v[x] curr_dict['name'] = curr_record[0] curr_dict['seeds'] = curr_record[1] curr_dict['size'] = curr_record[3].replace(',', '') curr_dict['leech'] = curr_record[2] curr_dict['engine_url'] = 'https://www.idope.site' curr_dict['desc_link'] = str(-1) curr_record.clear() prettyPrinter(curr_dict) curr_dict.clear()
def search(self, what, cat='all'): query = "http://www.mkvcage.bid/?s=" + what data = retrieve_url(query) found_games = re.findall(self.result_page_match, data) if found_games: if self.games_to_parse > len(found_games): self.games_to_parse = len(found_games) # handling each page in parallel, to not waste time on waiting for requests # for 8 entries this speeds up from 8s to 3s run time threads = [] for i in range(self.games_to_parse): # self.handle_page("http://" + found_games[i]) t = threading.Thread(target=self.handle_page, args=("http://" + found_games[i], )) threads.append(t) t.start() # search method needs to stay alive until all threads are done for t in threads: t.join() return
def fetch_results(self, query): results = json.loads(retrieve_url(query)) return results
def do_search(self, url): webpage = retrieve_url(url) tgParser = self.TorrentGalaxyParser() tgParser.feed(webpage)
def search(self, what, cat='all', sorttype='-1', page='1'): result = {} result['state'] = False result['list'] = [] if str(sorttype) == '-1': dialog = xbmcgui.Dialog() sorttype = dialog.select('btdigg搜索-选择排序类型', ['相关度', '下载热度', '创建时间', '文件大小', '文件数']) if sorttype == -1: return result sorttype = str(sorttype) result['sorttype'] = sorttype searchurl = self.url + '?q=%s&order=%s&p=%s' % ( urllib.quote(what), sorttype, str(int(page) - 1)) data = urllib.urlencode({'u': searchurl, 'b': '12', 'f': 'norefer'}) pageresult = '' for i in range(5): try: wp = webproxy.get('webproxy', '') if not wp: #plugin.notify('重新获取代理') wp = self.getwebproxy() #plugin.notify('当前代理:'+webproxy['webproxy']) webproxyurl = wp + '/browse.php?' + data pageresult = retrieve_url(webproxyurl, referer=wp) if pageresult: webproxy['webproxy'] = wp webproxy.sync() break else: continue except: continue if pageresult == '': webproxy['webproxy'] = '' webproxy.sync() return result try: rmain = r'"idx">.*?>(?P<title>[^<]+)</a>.*?href=".*?u=.*?(?P<magnet>magnet.*?)&.*?".*?attr_val">(?P<filesize>.*?)<.*?attr_val">(?P<filecount>.*?)<.*?attr_val">(?P<heatlevel>.*?)<.*?attr_val">(?P<createtime>.*?)<.*?attr_val">.*?attr_val">(?P<fake>.*?)<' reobj = re.compile(rmain, re.DOTALL | re.MULTILINE) #xbmc.log(msg=pageresult) for match in reobj.finditer(pageresult): fake = match.group('fake') if fake == 'No' or fake == '否': title = match.group('title') filesize = match.group('filesize').replace(' ', ' ') createtime = match.group('createtime').replace( ' ', ' ') filecount = match.group('filecount') magnet = match.group('magnet') else: magnet = '' continue res_dict = dict() res_dict['name'] = title res_dict['size'] = filesize res_dict['seeds'] = match.group('heatlevel') res_dict['leech'] = match.group('heatlevel') res_dict['link'] = magnet res_dict['date'] = createtime res_dict['desc_link'] = '' res_dict['engine_url'] = self.url result['list'].append(res_dict) if len(result['list']) > 0: result['nextpage'] = True except: return result result['state'] = True return result
def search(self, what, cat='all', sorttype='-1', page='1'): result = {} result['state'] = False result['list'] = [] if str(sorttype) == '-1': dialog = xbmcgui.Dialog() sorttype = dialog.select('btbook搜索-选择排序类型', ['创建时间', '文件大小', '下载热度', '相关度']) if sorttype == -1: return result sorttype = str(sorttype + 1) result['sorttype'] = sorttype searchurl = self.url + '%s/%s-%s.html' % ( urllib.quote(what), str(int(page)), str(sorttype)) #plugin.notify(searchurl) try: ''' json_data = retrieve_url('http://int.dpool.sina.com.cn/iplookup/iplookup.php?format=js') json_data= json_data.replace('\n','').replace('\r','') json_data=json_data[json_data.index('{'):json_data.index('}')+1] #xbmc.log(json_data) json_dict = json.loads(json_data) prov=json_dict['province'] #声明一个CookieJar对象实例来保存cookie cookie = cookielib.CookieJar() #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器 handler=urllib2.HTTPCookieProcessor(cookie) #通过handler来构建opener opener = urllib2.build_opener(handler) #此处的open方法同urllib2的urlopen方法,也可以传入request response = opener.open('http://www.btmilk.com/search/') cookie_item = cookielib.Cookie( version=0, name='prov', value=prov, port=None, port_specified=None, domain='http://www.btmilk.com/', domain_specified=None, domain_initial_dot=None, path='/', path_specified=None, secure=None, expires=None, discard=None, comment=None, comment_url=None, rest=None, rfc2109=False, ) cookie.set_cookie(cookie_item) response = opener.open(searchurl) ''' pageresult = retrieve_url(searchurl) #xbmc.log(pageresult) #pageresult=response.read() #xbmc.log(pageresult) rmain = r'创建时间.*?<b.*?>(?P<createtime>.*?)</b>.*?文件大小.*?<b.*?>(?P<filesize>.*?)</b>.*?下载热度.*?<b.*?>(?P<heatlevel>.*?)</b>.*?最近下载.*?<b.*?>(?P<lastdown>.*?)</b>.*?wiki/(?P<magnet>.*?).html.*?decodeURIComponent\x28"(?P<title>.*?)"\x29' reobj = re.compile(rmain, re.DOTALL) for match in reobj.finditer(pageresult): title = match.group('title').replace('"+"', '') title = urllib.unquote(title) #title=urllib.urldecode(match.group('title').replace('"+"','')) #xbmc.log(title) filesize = match.group('filesize') createtime = match.group('createtime') title = title.replace('<b>', '').replace('</b>', '') magnet = match.group('magnet') magnet = 'magnet:?xt=urn:btih:' + magnet res_dict = dict() res_dict['name'] = title res_dict['size'] = filesize res_dict['seeds'] = '' res_dict['leech'] = '' res_dict['link'] = magnet res_dict['date'] = createtime res_dict['desc_link'] = '' res_dict['engine_url'] = self.url result['list'].append(res_dict) if len(result['list']) > 0: result['nextpage'] = True except: return result result['state'] = True return result
def download_torrent(self, info): html = retrieve_url(info) m = re.search('(/tor/.+?\s)', html) if m and len(m.groups()) > 0: print(download_file(self.url + m.group(1)))
def do_search(self, url): webpage = retrieve_url(url) adexParser = self.anidexParser() adexParser.feed(webpage)
def download_torrent(self, desc_link): """ Downloader """ dl_link = re_compile("https://www\.hypercache\.pw/metadata/.+?/") data = retrieve_url(desc_link) dl_url = dl_link.findall(data)[0] print(download_file(dl_url))
def launch_request(self, query, skip): this_query = query + '&fuv=yes&skip=' + str(skip) these_items = json.loads(retrieve_url(this_query)) for item in these_items['results']: self.process_record(item)
def download_torrent(self, desc_link): """ Downloader """ dl_link = re_compile("/downloading/[^\"]+") data = retrieve_url(desc_link) dl_url = dl_link.findall(data)[0] print(download_file(self.url + dl_url))
def search(self, what, cat='all',sorttype='relevance',page='1'): result={} result['state']=False result['list']=[] result['sorttype']=sorttype if sorttype=='addtime': sorttype='1' elif sorttype=='size': sorttype='2' elif sorttype=='files': sorttype='3' elif sorttype=='popular': sorttype='4' else : sorttype='0' try: whatcode='' for i in range(20): try: locurl=urlgetlocation(self.url,data='keyword='+urllib.quote(what)) if str(locurl.getcode())=='302': match = re.search("search\x2F(.*?)\x2F", locurl.geturl(), re.IGNORECASE | re.DOTALL) if match: whatcode = match.group(1) else: whatcode = "" xbmc.log(msg='btdiggs-err:%s %s'%(int(locurl.getcode()),whatcode),level=xbmc.LOGERROR) break except Exception, errno: xbmc.log(msg='btdiggs-err:%s'%(errno),level=xbmc.LOGERROR) time.sleep(1) continue if whatcode=='': return result pageresult='' for i in range(10): searchurl=self.url+'search/%s/%s/%s/2.html'%(urllib.quote(whatcode),str(int(page)),str(sorttype),) pageresult = retrieve_url(searchurl,referer=self.url) if pageresult!='': break time.sleep(1) #xbmc.log(msg=pageresult,level=xbmc.LOGERROR) rmain=r'<dt><a.*?target=[\x22\x27]_blank[\x22\x27]>(?P<title>.*?)</a>.*?收录时间:<b>(?P<createtime>.*?)</b>.*?<span>文件大小:<b>(?P<filesize>.*?)</b>.*?<span>文件数:<b>(?P<filecount>.*?)</b>.*?<span>人气:<b>(?P<popular>.*?)</b>.*?href=[\x22\x27](?P<magnet>.*?)[\x22\x27]' reobj = re.compile(rmain, re.DOTALL) for match in reobj.finditer(pageresult): title=match.group('title') filesize=match.group('filesize') createtime=match.group('createtime') filecount=match.group('filecount') title = re.sub(r'<span\s+class="__cf_email.*?</span>', '', title, re.IGNORECASE | re.DOTALL) title=title.replace('<b>','').replace('</b>','') magnet=match.group('magnet') res_dict = dict() res_dict['name'] = title res_dict['size'] = filesize res_dict['filecount'] = filecount res_dict['seeds'] = '' res_dict['leech'] = '' res_dict['link'] = magnet res_dict['date'] =createtime res_dict['desc_link'] = '' res_dict['engine_url'] = self.url result['list'].append(res_dict) result['nextpage']=True
def download_torrent(self, info): """ Downloader """ html = retrieve_url(info) m = re.search('<a\s?href=[\'\"](magnet\:\?.+?)[\'\"]', html) if m and len(m.groups()) > 0: print(m.group(1) + ' ' + info)
def download_torrent(self, desc_link): """ Downloader """ dl_link = re_compile("/get_torrents/[a-zA-Z0-9]+") data = retrieve_url(desc_link) dl_url = dl_link.findall(data)[0] print(download_file(self.url + dl_url))
def download_torrent(self, info): html = helpers.retrieve_url(info) magnet = re.search("magnet:\?[^\"]*(?=\")", html).group() print(magnet + ' ' + info)
def search(self, what, cat='all'): parser = self.Parser(self.url) what = parser.generateQuery(what) parser.feed(json.loads(retrieve_url(what)))
def load(self): self.html = retrieve_url(self.url) return self
def download_torrent(self, info): if "trackerlist" not in self.__dict__: self.trackerlist = json.loads(retrieve_url("https://downloadtorrentfile.com/trackerlist")) magnet = f"magnet:?xt=urn:btih:{info.split('/')[-1]}&tr=" + "&tr=".join(self.trackerlist) print(magnet + ' ' + info)
def handle_page(self, search_query): response = retrieve_url(self.url + search_query) parser = self.MyHtmlParseWithBlackJack(self.url) parser.feed(self.torrent_list.search(response).group(0)) parser.close()
def download_torrent(self, info): info_page = retrieve_url(info) link = download_pattern.findall(info_page)[0] print download_file(link)
def search(self, keyword, cat='all'): job = score() params = job.paramBuilder(unquote(keyword)) url = job.urlBuilder(self.url, ['api', 'v2', 'list_movies.json'], params) data = retrieve_url(url) j = json.loads(data) # with open("assets/yts.v181109.json", "w") as f: # json.dump(j, f) if j['data']['movie_count'] and 'movies' in j['data']: page_of = '{}of{}'.format( j['data']['page_number'], int( math.ceil( int(j['data']['movie_count']) / int(j['data']['limit'])))) for movies in j['data']['movies']: for torrent in movies['torrents']: res = { 'link': job.magnetBuilder(torrent['hash'], movies['title']), 'name': '{n} ({y}) [{q}]-[{p}]-[{i}]'.format( n=movies['title'], y=movies['year'], q=torrent['quality'], p=page_of, i=self.name), 'size': torrent['size'], 'seeds': torrent['seeds'], 'leech': torrent['peers'], 'engine_url': 'IMDB:{rating}, [{genres}]'.format( rating=movies['rating'], genres=', '.join(movies['genres'])), 'desc_link': movies['url'] } job.done(res) elif job.supported_browse_params: url_params = job.supported_browse_params url_path = list( map(lambda i: i in params and params[i] or url_params[i], url_params)) url = job.urlBuilder(self.url, url_path, 'page' in params and {'page': params['page']}) data = retrieve_url(url) data = re.sub("\s\s+", "", data).replace('\n', '').replace('\r', '') data_container = re.findall( '<div class="browse-content"><div class="container">.*?<section><div class="row">(.*?)</div></section>.*?</div></div>', data) if data_container and data_container[0]: page_of = re.findall( '<li class="pagination-bordered">(.*?)</li>', data)[0] # 1 of 5 page_of = page_of and re.sub(' +', '', page_of).strip() or '?' data_movie = re.findall( '<div class=".?browse-movie-wrap.*?">.*?</div></div></div>', data_container[0]) for hM in data_movie: movie_link = re.findall( '<a href="(.*?)" class="browse-movie-link">.*?</a>', hM)[0] response_detail = retrieve_url(movie_link) response_detail = re.sub("\s\s+", "", response_detail).replace( '\n', '').replace('\r', '') movie_id = re.findall('data-movie-id="(\d+)"', response_detail)[0] if movie_id: url = job.urlBuilder( self.url, ['api', 'v2', 'movie_details.json'], {'movie_id': movie_id}) data_detail = retrieve_url(url) j = json.loads(data_detail) movies = j['data']['movie'] for torrent in movies['torrents']: res = { 'link': job.magnetBuilder(torrent['hash'], movies['title']), 'name': '{n} ({y}) [{q}]-[{p}]-[{i}]'.format( n=movies['title'], y=movies['year'], q=torrent['quality'], p=page_of, i=self.name[:-1]), 'size': torrent['size'], 'seeds': torrent['seeds'], 'leech': torrent['peers'], 'engine_url': 'IMDB:{rating}, [{genres}]'.format( rating=movies['rating'], genres=', '.join(movies['genres'])), 'desc_link': movies['url'] } job.done(res) else: # TODO: ?? movie_title = re.findall( '<a.*?class="browse-movie-title".*?>(.*?)</a>', hM)[0] movie_year = re.findall( '<div.?class="browse-movie-year".*?>(.*?)</div>', hM)[0] movie_rate = re.findall( '<h4.?class="rating".*?>(.*?)</h4>', hM)[0] movie_rate = movie_rate.split('/')[0] movie_genre = re.findall( '<figcaption class=".*?">.*?(<h4>.*</h4>).*?</figcaption>', hM)[0] movie_genre = re.findall('<h4>(.*?)</h4>', movie_genre) # print(movie_title,movie_link,movie_year,movie_rate,movie_genre) job.done() else: # NOTE: No match found job.done() else: # NOTE: not supported browsing job.done()