def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//div[@class="results"]/dl'): name_cell = result.xpath('./dt')[0] title = extract_text(name_cell) # skip rows that do not contain a link to a torrent links = name_cell.xpath('./a') if len(links) != 1: continue # extract url and remove a slash in the beginning link = links[0].attrib.get('href').lstrip('/') seed = 0 leech = 0 try: seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', '')) leech = int( result.xpath('./dd/span[5]/text()')[0].replace(',', '')) except: pass params = { 'url': base_url + link, 'title': title, 'seed': seed, 'leech': leech, 'template': 'torrent.html' } # let's try to calculate the torrent size try: filesize_info = result.xpath('./dd/span[3]/text()')[0] filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) params['filesize'] = filesize except: pass # does our link contain a valid SHA1 sum? if re.compile('[0-9a-fA-F]{40}').match(link): # add a magnet link to the result params['magnetlink'] = 'magnet:?xt=urn:btih:' + link # extract and convert creation date try: date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title') date = datetime.fromtimestamp(float(date_ts)) params['publishedDate'] = date except: pass results.append(params) return results
def response(resp): results = [] dom = html.fromstring(resp.content) search_res = dom.xpath('//div[@id="search_res"]/table/tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) content = "<br />".join(content.split("\n")) filesize = result.xpath( './/span[@class="attr_val"]/text()')[0].split()[0] filesize_multiplier = result.xpath( './/span[@class="attr_val"]/text()')[0].split()[1] files = result.xpath('.//span[@class="attr_val"]/text()')[1] seed = result.xpath('.//span[@class="attr_val"]/text()')[2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 leech = 0 # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] # append result results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html' }) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): dom = html.fromstring(resp.content) search_res = dom.xpath('.//td[@class="x-item"]') if not search_res: return list() results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) title = result.xpath('.//a[@title]/text()')[0] content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] results.append({'url': url, 'title': title, 'content': content, 'filesize': filesize, 'magnetlink': magnetlink, 'seed': 'N/A', 'leech': 'N/A', 'template': 'torrent.html'}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in eval_xpath_list( dom, '//table[contains(@class, "table-list")]/tbody//tr'): href = urljoin( url, eval_xpath_getindex(result, './td[contains(@class, "name")]/a[2]/@href', 0)) title = extract_text( eval_xpath(result, './td[contains(@class, "name")]/a[2]')) seed = extract_text( eval_xpath(result, './/td[contains(@class, "seeds")]')) leech = extract_text( eval_xpath(result, './/td[contains(@class, "leeches")]')) filesize_info = extract_text( eval_xpath(result, './/td[contains(@class, "size")]/text()')) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) results.append({ 'url': href, 'title': title, 'seed': seed, 'leech': leech, 'filesize': filesize, 'template': 'torrent.html' }) return results
def response(resp): dom = html.fromstring(resp.text) search_res = dom.xpath('.//td[@class="x-item"]') if not search_res: return list() results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text( result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath( './/div[@class="tail"]//a[@class="title"]/@href')[0] results.append({ 'url': url, 'title': title, 'content': content, 'filesize': filesize, 'magnetlink': magnetlink, 'seed': 'N/A', 'leech': 'N/A', 'template': 'torrent.html' }) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//div[@class="results"]/dl'): name_cell = result.xpath('./dt')[0] title = extract_text(name_cell) # skip rows that do not contain a link to a torrent links = name_cell.xpath('./a') if len(links) != 1: continue # extract url and remove a slash in the beginning link = links[0].attrib.get('href').lstrip('/') seed = 0 leech = 0 try: seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', '')) leech = int(result.xpath('./dd/span[5]/text()')[0].replace(',', '')) except: pass params = { 'url': base_url + link, 'title': title, 'seed': seed, 'leech': leech, 'template': 'torrent.html' } # let's try to calculate the torrent size try: filesize_info = result.xpath('./dd/span[3]/text()')[0] filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) params['filesize'] = filesize except: pass # does our link contain a valid SHA1 sum? if re.compile('[0-9a-fA-F]{40}').match(link): # add a magnet link to the result params['magnetlink'] = 'magnet:?xt=urn:btih:' + link # extract and convert creation date try: date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title') date = datetime.fromtimestamp(float(date_ts)) params['publishedDate'] = date except: pass results.append(params) return results
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@class="one_result"]') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//div[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0] content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False) # it is better to emit <br/> instead of |, but html tags are verboten content = content.strip().replace('\n', ' | ') content = ' '.join(content.split()) filesize = result.xpath( './/span[@class="torrent_size"]/text()')[0].split()[0] filesize_multiplier = result.xpath( './/span[@class="torrent_size"]/text()')[0].split()[1] files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible try: files = int(files) except: files = None magnetlink = result.xpath( './/div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result results.append({ 'url': href, 'title': title, 'content': content, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html' }) # return results sorted by seeder return results
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) content = "<br />".join(content.split("\n")) filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1] files = result.xpath('.//span[@class="attr_val"]/text()')[1] seed = result.xpath('.//span[@class="attr_val"]/text()')[2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 leech = 0 # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] # append result results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@class="data"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) filesize_info = extract_text( result.xpath('.//td[contains(@class, "nobr")]')) files = extract_text( result.xpath('.//td[contains(@class, "center")][2]')) seed = convert_str_to_int(seed) leech = convert_str_to_int(leech) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath(magnet_xpath)[0].attrib['href'] torrentfile = result.xpath(torrent_xpath)[0].attrib['href'] torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'torrentfile': torrentfileurl, 'template': 'torrent.html' }) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@class="data"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = extract_text(link) content = escape(extract_text(result.xpath(content_xpath))) seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]')) files = extract_text(result.xpath('.//td[contains(@class, "center")][2]')) seed = convert_str_to_int(seed) leech = convert_str_to_int(leech) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath(magnet_xpath)[0].attrib['href'] torrentfile = result.xpath(torrent_xpath)[0].attrib['href'] torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'torrentfile': torrentfileurl, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] search_res = loads(resp.text) # return empty array if nothing is found if search_res[0]["name"] == "No results returned": return [] # parse results for result in search_res: link = url + "description.php?id=" + result["id"] magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ + "&tr=" + "&tr=".join(trackers) params = { "url": link, "title": result["name"], "seed": result["seeders"], "leech": result["leechers"], "magnetlink": magnetlink, "template": "torrent.html" } # extract and convert creation date try: date = datetime.fromtimestamp(float(result["added"])) params['publishedDate'] = date except: pass # let's try to calculate the torrent size try: filesize = get_torrent_size(result["size"], "B") params['filesize'] = filesize except: pass # append result results.append(params) # return results sorted by seeder return sorted(results, key=itemgetter("seed"), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(xpath_results): # defaults filesize = 0 magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce" torrent_link = "" try: category = extract_text(result.xpath(xpath_category)[0]) except: pass page_a = result.xpath(xpath_title)[0] title = extract_text(page_a) href = base_url + page_a.attrib.get('href') magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5]) try: filesize_info = result.xpath(xpath_filesize)[0] filesize = filesize_info[:-2] filesize_multiplier = filesize_info[-2:] filesize = get_torrent_size(filesize, filesize_multiplier) except: pass # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime content = 'Category: "{category}".' content = content.format(category=category) results.append({ 'url': href, 'title': title, 'content': content, 'filesize': filesize, 'magnetlink': magnet_link, 'template': 'torrent.html' }) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//table[contains(@class, "table-list")]/tbody//tr'): href = urljoin(url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0]) title = extract_text(result.xpath('./td[contains(@class, "name")]/a[2]')) seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]')) leech = extract_text(result.xpath('.//td[contains(@class, "leeches")]')) filesize_info = extract_text(result.xpath('.//td[contains(@class, "size")]/text()')) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) results.append({'url': href, 'title': title, 'seed': seed, 'leech': leech, 'filesize': filesize, 'template': 'torrent.html'}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(xpath_results): # defaults filesize = 0 magnet_link = "" torrent_link = "" # category in which our torrent belongs try: category = result.xpath(xpath_category)[0].attrib.get('title') except: pass # torrent title page_a = result.xpath(xpath_title)[0] title = extract_text(page_a) # link to the page href = base_url + page_a.attrib.get('href') for link in result.xpath(xpath_torrent_links): url = link.attrib.get('href') if 'magnet' in url: # link to the magnet magnet_link = url else: # link to the torrent file torrent_link = url # seed count seed = int_or_zero(result.xpath(xpath_seeds)) # leech count leech = int_or_zero(result.xpath(xpath_leeches)) # torrent downloads count downloads = int_or_zero(result.xpath(xpath_downloads)) # let's try to calculate the torrent size try: filesize_info = result.xpath(xpath_filesize)[0] filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) except: pass # content string contains all information not included into template content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'torrentfile': torrent_link, 'magnetlink': magnet_link, 'template': 'torrent.html'}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) rows = dom.xpath( '//table[@class="listing"]//tr[contains(@class, "category_0")]') # check if there are no results or page layout was changed so we cannot parse it # currently there are two rows for each result, so total count must be even if len(rows) == 0 or len(rows) % 2 != 0: return [] # regular expression for parsing torrent size strings size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) # processing the results, two rows at a time for i in range(0, len(rows), 2): # parse the first row name_row = rows[i] links = name_row.xpath('./td[@class="desc-top"]/a') params = { 'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1]) } # I have not yet seen any torrents without magnet links, but # it's better to be prepared to stumble upon one some day if len(links) == 2: magnet = links[0].attrib.get('href') if magnet.startswith('magnet'): # okay, we have a valid magnet link, let's add it to the result params['magnetlink'] = magnet # no more info in the first row, start parsing the second one info_row = rows[i + 1] desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0]) for item in desc.split('|'): item = item.strip() if item.startswith('Size:'): try: # ('1.228', 'GB') groups = size_re.match(item).groups() params['filesize'] = get_torrent_size(groups[0], groups[1]) except: pass elif item.startswith('Date:'): try: # Date: 2016-02-21 21:44 UTC date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') params['publishedDate'] = date except: pass elif item.startswith('Comment:'): params['content'] = item stats = info_row.xpath('./td[@class="stats"]/span') # has the layout not changed yet? if len(stats) == 3: params['seed'] = int_or_zero(extract_text(stats[0])) params['leech'] = int_or_zero(extract_text(stats[1])) results.append(params) return results
def response(resp): results = [] dom = html.fromstring(resp.text) rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]') # check if there are no results or page layout was changed so we cannot parse it # currently there are two rows for each result, so total count must be even if len(rows) == 0 or len(rows) % 2 != 0: return [] # regular expression for parsing torrent size strings size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) # processing the results, two rows at a time for i in range(0, len(rows), 2): # parse the first row name_row = rows[i] links = name_row.xpath('./td[@class="desc-top"]/a') params = { 'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1]) } # I have not yet seen any torrents without magnet links, but # it's better to be prepared to stumble upon one some day if len(links) == 2: magnet = links[0].attrib.get('href') if magnet.startswith('magnet'): # okay, we have a valid magnet link, let's add it to the result params['magnetlink'] = magnet # no more info in the first row, start parsing the second one info_row = rows[i + 1] desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0]) for item in desc.split('|'): item = item.strip() if item.startswith('Size:'): try: # ('1.228', 'GB') groups = size_re.match(item).groups() params['filesize'] = get_torrent_size(groups[0], groups[1]) except: pass elif item.startswith('Date:'): try: # Date: 2016-02-21 21:44 UTC date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') params['publishedDate'] = date except: pass elif item.startswith('Comment:'): params['content'] = item stats = info_row.xpath('./td[@class="stats"]/span') # has the layout not changed yet? if len(stats) == 3: params['seed'] = int_or_zero(extract_text(stats[0])) params['leech'] = int_or_zero(extract_text(stats[1])) results.append(params) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(xpath_results): # defaults filesize = 0 magnet_link = "" torrent_link = "" # category in which our torrent belongs try: category = result.xpath(xpath_category)[0].attrib.get('title') except: pass # torrent title page_a = result.xpath(xpath_title)[0] title = extract_text(page_a) # link to the page href = base_url + page_a.attrib.get('href') for link in result.xpath(xpath_torrent_links): url = link.attrib.get('href') if 'magnet' in url: # link to the magnet magnet_link = url else: # link to the torrent file torrent_link = url # seed count seed = int_or_zero(result.xpath(xpath_seeds)) # leech count leech = int_or_zero(result.xpath(xpath_leeches)) # torrent downloads count downloads = int_or_zero(result.xpath(xpath_downloads)) # let's try to calculate the torrent size try: filesize_info = result.xpath(xpath_filesize)[0] filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) except: pass # content string contains all information not included into template content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'torrentfile': torrent_link, 'magnetlink': magnet_link, 'template': 'torrent.html' }) return results
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//a[@id="torrent_name"]')[0] href = link.attrib.get('href') title = extract_text(link) seed = result.xpath('.//td[8]/text()')[0] leech = result.xpath('.//td[9]/text()')[0] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 # convert leech to int if possible if leech.isdigit(): leech = int(leech) else: leech = 0 params = { 'url': href, 'title': title, 'seed': seed, 'leech': leech, 'template': 'torrent.html' } # let's try to calculate the torrent size try: filesize_info = result.xpath('.//td[6]/text()')[0] filesize = filesize_info[:-2] filesize_multiplier = filesize_info[-2:].lower() multiplier_french_to_english = { 'to': 'TiB', 'go': 'GiB', 'mo': 'MiB', 'ko': 'KiB' } filesize = get_torrent_size( filesize, multiplier_french_to_english[filesize_multiplier]) params['filesize'] = filesize except: pass # extract and convert creation date try: date_ts = result.xpath('.//td[5]/div/text()')[0] date = datetime.fromtimestamp(float(date_ts)) params['publishedDate'] = date except: pass # append result results.append(params) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)