Python get_torrent_size Exemples, searx.utils.get_torrent_size Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : torrentz.py Projet : searxng/searxng

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath('//div[@class="results"]/dl'):
        name_cell = result.xpath('./dt')[0]
        title = extract_text(name_cell)

        # skip rows that do not contain a link to a torrent
        links = name_cell.xpath('./a')
        if len(links) != 1:
            continue

        # extract url and remove a slash in the beginning
        link = links[0].attrib.get('href').lstrip('/')

        seed = 0
        leech = 0
        try:
            seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', ''))
            leech = int(
                result.xpath('./dd/span[5]/text()')[0].replace(',', ''))
        except:
            pass

        params = {
            'url': base_url + link,
            'title': title,
            'seed': seed,
            'leech': leech,
            'template': 'torrent.html'
        }

        # let's try to calculate the torrent size
        try:
            filesize_info = result.xpath('./dd/span[3]/text()')[0]
            filesize, filesize_multiplier = filesize_info.split()
            filesize = get_torrent_size(filesize, filesize_multiplier)

            params['filesize'] = filesize
        except:
            pass

        # does our link contain a valid SHA1 sum?
        if re.compile('[0-9a-fA-F]{40}').match(link):
            # add a magnet link to the result
            params['magnetlink'] = 'magnet:?xt=urn:btih:' + link

        # extract and convert creation date
        try:
            date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title')
            date = datetime.fromtimestamp(float(date_ts))
            params['publishedDate'] = date
        except:
            pass

        results.append(params)

    return results

Exemple #2

0

Afficher le fichier

def response(resp):
    results = []

    dom = html.fromstring(resp.content)

    search_res = dom.xpath('//div[@id="search_res"]/table/tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
        content = "<br />".join(content.split("\n"))

        filesize = result.xpath(
            './/span[@class="attr_val"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath(
            './/span[@class="attr_val"]/text()')[0].split()[1]
        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        leech = 0

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)

Exemple #3

0

Afficher le fichier

Fichier : digbt.py Projet : cyrilix/searx

def response(resp):
    dom = html.fromstring(resp.content)
    search_res = dom.xpath('.//td[@class="x-item"]')

    if not search_res:
        return list()

    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = result.xpath('.//a[@title]/text()')[0]
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]

        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'filesize': filesize,
                        'magnetlink': magnetlink,
                        'seed': 'N/A',
                        'leech': 'N/A',
                        'template': 'torrent.html'})

    return results

Exemple #4

0

Afficher le fichier

Fichier : 1337x.py Projet : searxng/searxng

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in eval_xpath_list(
            dom, '//table[contains(@class, "table-list")]/tbody//tr'):
        href = urljoin(
            url,
            eval_xpath_getindex(result,
                                './td[contains(@class, "name")]/a[2]/@href',
                                0))
        title = extract_text(
            eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
        seed = extract_text(
            eval_xpath(result, './/td[contains(@class, "seeds")]'))
        leech = extract_text(
            eval_xpath(result, './/td[contains(@class, "leeches")]'))
        filesize_info = extract_text(
            eval_xpath(result, './/td[contains(@class, "size")]/text()'))
        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)

        results.append({
            'url': href,
            'title': title,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'template': 'torrent.html'
        })

    return results

Exemple #5

0

Afficher le fichier

Fichier : digbt.py Projet : med15060/oma

def response(resp):
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('.//td[@class="x-item"]')

    if not search_res:
        return list()

    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = extract_text(result.xpath('.//a[@title]'))
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(
            result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE],
                                    files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath(
            './/div[@class="tail"]//a[@class="title"]/@href')[0]

        results.append({
            'url': url,
            'title': title,
            'content': content,
            'filesize': filesize,
            'magnetlink': magnetlink,
            'seed': 'N/A',
            'leech': 'N/A',
            'template': 'torrent.html'
        })

    return results

Exemple #6

0

Afficher le fichier

Fichier : torrentz.py Projet : MrLpk/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath('//div[@class="results"]/dl'):
        name_cell = result.xpath('./dt')[0]
        title = extract_text(name_cell)

        # skip rows that do not contain a link to a torrent
        links = name_cell.xpath('./a')
        if len(links) != 1:
            continue

        # extract url and remove a slash in the beginning
        link = links[0].attrib.get('href').lstrip('/')

        seed = 0
        leech = 0
        try:
            seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', ''))
            leech = int(result.xpath('./dd/span[5]/text()')[0].replace(',', ''))
        except:
            pass

        params = {
            'url': base_url + link,
            'title': title,
            'seed': seed,
            'leech': leech,
            'template': 'torrent.html'
        }

        # let's try to calculate the torrent size
        try:
            filesize_info = result.xpath('./dd/span[3]/text()')[0]
            filesize, filesize_multiplier = filesize_info.split()
            filesize = get_torrent_size(filesize, filesize_multiplier)

            params['filesize'] = filesize
        except:
            pass

        # does our link contain a valid SHA1 sum?
        if re.compile('[0-9a-fA-F]{40}').match(link):
            # add a magnet link to the result
            params['magnetlink'] = 'magnet:?xt=urn:btih:' + link

        # extract and convert creation date
        try:
            date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title')
            date = datetime.fromtimestamp(float(date_ts))
            params['publishedDate'] = date
        except:
            pass

        results.append(params)

    return results

Exemple #7

0

Afficher le fichier

Fichier : btdigg.py Projet : gabriel0miranda/searx-tor

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//div[@class="one_result"]')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//div[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)

        excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
        content = html.tostring(excerpt,
                                encoding='unicode',
                                method='text',
                                with_tail=False)
        # it is better to emit <br/> instead of |, but html tags are verboten
        content = content.strip().replace('\n', ' | ')
        content = ' '.join(content.split())

        filesize = result.xpath(
            './/span[@class="torrent_size"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath(
            './/span[@class="torrent_size"]/text()')[0].split()[1]
        files = (result.xpath('.//span[@class="torrent_files"]/text()')
                 or ['1'])[0]

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        try:
            files = int(files)
        except:
            files = None

        magnetlink = result.xpath(
            './/div[@class="torrent_magnet"]//a')[0].attrib['href']

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return results

Exemple #8

0

Afficher le fichier

Fichier : btdigg.py Projet : MrLpk/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//div[@id="search_res"]/table/tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
        content = "<br />".join(content.split("\n"))

        filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        leech = 0

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)

Exemple #9

0

Afficher le fichier

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@class="data"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))
        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
        filesize_info = extract_text(
            result.xpath('.//td[contains(@class, "nobr")]'))
        files = extract_text(
            result.xpath('.//td[contains(@class, "center")][2]'))

        seed = convert_str_to_int(seed)
        leech = convert_str_to_int(leech)

        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']

        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'torrentfile': torrentfileurl,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)

Exemple #10

0

Afficher le fichier

Fichier : kickass.py Projet : NotoriousDev/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@class="data"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = escape(extract_text(result.xpath(content_xpath)))
        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
        filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
        files = extract_text(result.xpath('.//td[contains(@class, "center")][2]'))

        seed = convert_str_to_int(seed)
        leech = convert_str_to_int(leech)

        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']

        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'torrentfile': torrentfileurl,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)

Exemple #11

0

Afficher le fichier

def response(resp):
    results = []

    search_res = loads(resp.text)

    # return empty array if nothing is found
    if search_res[0]["name"] == "No results returned":
        return []

    # parse results
    for result in search_res:
        link = url + "description.php?id=" + result["id"]
        magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
                     + "&tr=" + "&tr=".join(trackers)

        params = {
            "url": link,
            "title": result["name"],
            "seed": result["seeders"],
            "leech": result["leechers"],
            "magnetlink": magnetlink,
            "template": "torrent.html"
        }

        # extract and convert creation date
        try:
            date = datetime.fromtimestamp(float(result["added"]))
            params['publishedDate'] = date
        except:
            pass

        # let's try to calculate the torrent size
        try:
            filesize = get_torrent_size(result["size"], "B")
            params['filesize'] = filesize
        except:
            pass

        # append result
        results.append(params)

    # return results sorted by seeder
    return sorted(results, key=itemgetter("seed"), reverse=True)

Exemple #12

0

Afficher le fichier

def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for result in dom.xpath(xpath_results):
        # defaults
        filesize = 0
        magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce"
        torrent_link = ""

        try:
            category = extract_text(result.xpath(xpath_category)[0])
        except:
            pass

        page_a = result.xpath(xpath_title)[0]
        title = extract_text(page_a)
        href = base_url + page_a.attrib.get('href')

        magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5])

        try:
            filesize_info = result.xpath(xpath_filesize)[0]
            filesize = filesize_info[:-2]
            filesize_multiplier = filesize_info[-2:]
            filesize = get_torrent_size(filesize, filesize_multiplier)
        except:
            pass
        # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
        content = 'Category: "{category}".'
        content = content.format(category=category)

        results.append({
            'url': href,
            'title': title,
            'content': content,
            'filesize': filesize,
            'magnetlink': magnet_link,
            'template': 'torrent.html'
        })
    return results

Exemple #13

0

Afficher le fichier

Fichier : 1337x.py Projet : MrLpk/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath('//table[contains(@class, "table-list")]/tbody//tr'):
        href = urljoin(url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0])
        title = extract_text(result.xpath('./td[contains(@class, "name")]/a[2]'))
        seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "leeches")]'))
        filesize_info = extract_text(result.xpath('.//td[contains(@class, "size")]/text()'))
        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)

        results.append({'url': href,
                        'title': title,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'template': 'torrent.html'})

    return results

Exemple #14

0

Afficher le fichier

Fichier : nyaa.py Projet : MrLpk/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath(xpath_results):
        # defaults
        filesize = 0
        magnet_link = ""
        torrent_link = ""

        # category in which our torrent belongs
        try:
            category = result.xpath(xpath_category)[0].attrib.get('title')
        except:
            pass

        # torrent title
        page_a = result.xpath(xpath_title)[0]
        title = extract_text(page_a)

        # link to the page
        href = base_url + page_a.attrib.get('href')

        for link in result.xpath(xpath_torrent_links):
            url = link.attrib.get('href')
            if 'magnet' in url:
                # link to the magnet
                magnet_link = url
            else:
                # link to the torrent file
                torrent_link = url

        # seed count
        seed = int_or_zero(result.xpath(xpath_seeds))

        # leech count
        leech = int_or_zero(result.xpath(xpath_leeches))

        # torrent downloads count
        downloads = int_or_zero(result.xpath(xpath_downloads))

        # let's try to calculate the torrent size
        try:
            filesize_info = result.xpath(xpath_filesize)[0]
            filesize, filesize_multiplier = filesize_info.split()
            filesize = get_torrent_size(filesize, filesize_multiplier)
        except:
            pass

        # content string contains all information not included into template
        content = 'Category: "{category}". Downloaded {downloads} times.'
        content = content.format(category=category, downloads=downloads)

        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'torrentfile': torrent_link,
                        'magnetlink': magnet_link,
                        'template': 'torrent.html'})

    return results

Exemple #15

0

Afficher le fichier

Fichier : tokyotoshokan.py Projet : 0xn3xus/neovo

def response(resp):
    results = []

    dom = html.fromstring(resp.text)
    rows = dom.xpath(
        '//table[@class="listing"]//tr[contains(@class, "category_0")]')

    # check if there are no results or page layout was changed so we cannot parse it
    # currently there are two rows for each result, so total count must be even
    if len(rows) == 0 or len(rows) % 2 != 0:
        return []

    # regular expression for parsing torrent size strings
    size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)

    # processing the results, two rows at a time
    for i in range(0, len(rows), 2):
        # parse the first row
        name_row = rows[i]

        links = name_row.xpath('./td[@class="desc-top"]/a')
        params = {
            'template': 'torrent.html',
            'url': links[-1].attrib.get('href'),
            'title': extract_text(links[-1])
        }
        # I have not yet seen any torrents without magnet links, but
        # it's better to be prepared to stumble upon one some day
        if len(links) == 2:
            magnet = links[0].attrib.get('href')
            if magnet.startswith('magnet'):
                # okay, we have a valid magnet link, let's add it to the result
                params['magnetlink'] = magnet

        # no more info in the first row, start parsing the second one
        info_row = rows[i + 1]
        desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0])
        for item in desc.split('|'):
            item = item.strip()
            if item.startswith('Size:'):
                try:
                    # ('1.228', 'GB')
                    groups = size_re.match(item).groups()
                    params['filesize'] = get_torrent_size(groups[0], groups[1])
                except:
                    pass
            elif item.startswith('Date:'):
                try:
                    # Date: 2016-02-21 21:44 UTC
                    date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
                    params['publishedDate'] = date
                except:
                    pass
            elif item.startswith('Comment:'):
                params['content'] = item
        stats = info_row.xpath('./td[@class="stats"]/span')
        # has the layout not changed yet?
        if len(stats) == 3:
            params['seed'] = int_or_zero(extract_text(stats[0]))
            params['leech'] = int_or_zero(extract_text(stats[1]))

        results.append(params)

    return results

Exemple #16

0

Afficher le fichier

Fichier : tokyotoshokan.py Projet : MrLpk/searx

def response(resp):
    results = []

    dom = html.fromstring(resp.text)
    rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]')

    # check if there are no results or page layout was changed so we cannot parse it
    # currently there are two rows for each result, so total count must be even
    if len(rows) == 0 or len(rows) % 2 != 0:
        return []

    # regular expression for parsing torrent size strings
    size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)

    # processing the results, two rows at a time
    for i in range(0, len(rows), 2):
        # parse the first row
        name_row = rows[i]

        links = name_row.xpath('./td[@class="desc-top"]/a')
        params = {
            'template': 'torrent.html',
            'url': links[-1].attrib.get('href'),
            'title': extract_text(links[-1])
        }
        # I have not yet seen any torrents without magnet links, but
        # it's better to be prepared to stumble upon one some day
        if len(links) == 2:
            magnet = links[0].attrib.get('href')
            if magnet.startswith('magnet'):
                # okay, we have a valid magnet link, let's add it to the result
                params['magnetlink'] = magnet

        # no more info in the first row, start parsing the second one
        info_row = rows[i + 1]
        desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0])
        for item in desc.split('|'):
            item = item.strip()
            if item.startswith('Size:'):
                try:
                    # ('1.228', 'GB')
                    groups = size_re.match(item).groups()
                    params['filesize'] = get_torrent_size(groups[0], groups[1])
                except:
                    pass
            elif item.startswith('Date:'):
                try:
                    # Date: 2016-02-21 21:44 UTC
                    date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
                    params['publishedDate'] = date
                except:
                    pass
            elif item.startswith('Comment:'):
                params['content'] = item
        stats = info_row.xpath('./td[@class="stats"]/span')
        # has the layout not changed yet?
        if len(stats) == 3:
            params['seed'] = int_or_zero(extract_text(stats[0]))
            params['leech'] = int_or_zero(extract_text(stats[1]))

        results.append(params)

    return results

Exemple #17

0

Afficher le fichier

Fichier : nyaa.py Projet : Danutu89/NowSearch-Backend

def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath(xpath_results):
        # defaults
        filesize = 0
        magnet_link = ""
        torrent_link = ""

        # category in which our torrent belongs
        try:
            category = result.xpath(xpath_category)[0].attrib.get('title')
        except:
            pass

        # torrent title
        page_a = result.xpath(xpath_title)[0]
        title = extract_text(page_a)

        # link to the page
        href = base_url + page_a.attrib.get('href')

        for link in result.xpath(xpath_torrent_links):
            url = link.attrib.get('href')
            if 'magnet' in url:
                # link to the magnet
                magnet_link = url
            else:
                # link to the torrent file
                torrent_link = url

        # seed count
        seed = int_or_zero(result.xpath(xpath_seeds))

        # leech count
        leech = int_or_zero(result.xpath(xpath_leeches))

        # torrent downloads count
        downloads = int_or_zero(result.xpath(xpath_downloads))

        # let's try to calculate the torrent size
        try:
            filesize_info = result.xpath(xpath_filesize)[0]
            filesize, filesize_multiplier = filesize_info.split()
            filesize = get_torrent_size(filesize, filesize_multiplier)
        except:
            pass

        # content string contains all information not included into template
        content = 'Category: "{category}". Downloaded {downloads} times.'
        content = content.format(category=category, downloads=downloads)

        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'torrentfile': torrent_link,
            'magnetlink': magnet_link,
            'template': 'torrent.html'
        })

    return results

Exemple #18

0

Afficher le fichier

Fichier : yggtorrent.py Projet : zlsdzh001/searx

def response(resp):
    results = []
    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//a[@id="torrent_name"]')[0]
        href = link.attrib.get('href')
        title = extract_text(link)
        seed = result.xpath('.//td[8]/text()')[0]
        leech = result.xpath('.//td[9]/text()')[0]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0

        params = {
            'url': href,
            'title': title,
            'seed': seed,
            'leech': leech,
            'template': 'torrent.html'
        }

        # let's try to calculate the torrent size
        try:
            filesize_info = result.xpath('.//td[6]/text()')[0]
            filesize = filesize_info[:-2]
            filesize_multiplier = filesize_info[-2:].lower()
            multiplier_french_to_english = {
                'to': 'TiB',
                'go': 'GiB',
                'mo': 'MiB',
                'ko': 'KiB'
            }
            filesize = get_torrent_size(
                filesize, multiplier_french_to_english[filesize_multiplier])
            params['filesize'] = filesize
        except:
            pass

        # extract and convert creation date
        try:
            date_ts = result.xpath('.//td[5]/div/text()')[0]
            date = datetime.fromtimestamp(float(date_ts))
            params['publishedDate'] = date
        except:
            pass

        # append result
        results.append(params)

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)