Beispiel #1
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)
    for res in dom.xpath('//div[@class="List-item MainListing"]'):
        # processed start and end of link
        link = res.xpath('//a')[0]

        url = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(link)

        thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src'])
        # TODO: get image with higher resolution
        img_src = thumbnail_src

        # append result
        results.append({
            'url': url,
            'title': title,
            'img_src': img_src,
            'content': '',
            'thumbnail_src': thumbnail_src,
            'template': 'images.html'
        })

    # return results
    return results
Beispiel #2
0
def response(resp):
    results = []

    # get links from result-text
    regex = re.compile('(</a>|<a)')
    results_parts = re.split(regex, resp.text)

    cur_element = ''

    # iterate over link parts
    for result_part in results_parts:
        # processed start and end of link
        if result_part == '<a':
            cur_element = result_part
            continue
        elif result_part != '</a>':
            cur_element += result_part
            continue

        cur_element += result_part

        # fix xml-error
        cur_element = cur_element.replace('"></a>', '"/></a>')

        dom = html.fromstring(cur_element)
        link = dom.xpath('//a')[0]

        url = urljoin(base_url, link.attrib.get('href'))
        title = link.attrib.get('title', '')

        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
        # TODO: get image with higher resolution
        img_src = thumbnail_src

        # check if url is showing to a photo
        if '/photo/' not in url:
            continue

        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': '',
                        'thumbnail_src': thumbnail_src,
                        'template': 'images.html'})

    # return results
    return results
Beispiel #3
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)
    try:
        logdiv = dom.xpath('//*[@id="log"]//*[contains(concat(" ",normalize-space(@class)," ")," log-messages ")]')[0]

        logger.debug(len(logdiv.xpath(".//h4"))+1)
        #this stuff is probaby buggy as hell
        for i in range(1,len(logdiv.xpath(".//h4"))+1):
            date = logdiv.xpath(".//h4[%s]" % i)[0].text
            #content = logdiv.xpath(".//h4[%s]/following-sibling::div" % i)[0]
            content = []
            for item in logdiv.xpath(".//h4[%s]" % i)[0].itersiblings():
                if item.tag != 'h4':
                    content += [item.text_content().strip()]
                else:
                    break

            results.append({'url': urljoin(base_url, logdiv.xpath(".//h4[%s]/following-sibling::div/a" % i)[0].get('href')),
                            'title': date.strip(),
                            'content': "|".join(content).strip()})
    except:
        logger.debug("not failing silently")
        pass

    logger.debug(results)
    return results
Beispiel #4
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this...
        title = escape(extract_text(link))
        thumbnail_tags = result.xpath(thumbnail_xpath)
        thumbnail = None
        if len(thumbnail_tags) > 0:
            thumbnail = extract_text(thumbnail_tags[0])
            if thumbnail[0] == '/':
                thumbnail = base_url + thumbnail
        content = escape(extract_text(result.xpath(content_xpath)))

        # append result
        results.append({
            'url': href,
            'title': title,
            'img_src': thumbnail,
            'content': content
        })

    # return results
    return results
Beispiel #5
0
def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    torrent_links = dom.xpath(torrent_xpath)
    if len(torrent_links) > 0:
        seeds = dom.xpath(seeds_xpath)
        peers = dom.xpath(peers_xpath)
        titles = dom.xpath(title_xpath)
        sizes = dom.xpath(size_xpath)
        ages = dom.xpath(age_xpath)
    else:  # under ~5 results uses a different xpath
        torrent_links = dom.xpath(alternative_torrent_xpath)
        seeds = dom.xpath(alternative_seeds_xpath)
        peers = dom.xpath(alternative_peers_xpath)
        titles = dom.xpath(alternative_title_xpath)
        sizes = dom.xpath(alternative_size_xpath)
        ages = dom.xpath(alternative_age_xpath)
    # return empty array if nothing is found
    if not torrent_links:
        return []

    # parse results
    for index, result in enumerate(torrent_links):
        link = result.attrib.get('href')
        href = urljoin(url, link)
        results.append({'url': href,
                        'title': titles[index].text_content(),
                        'content': '{}, {}'.format(sizes[index], ages[index]),
                        'seed': seeds[index],
                        'leech': peers[index],

                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #6
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath(
            '//table[contains(@class, "table-list")]/tbody//tr'):
        href = urljoin(
            url,
            result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0])
        title = extract_text(
            result.xpath('./td[contains(@class, "name")]/a[2]'))
        seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]'))
        leech = extract_text(
            result.xpath('.//td[contains(@class, "leeches")]'))
        filesize_info = extract_text(
            result.xpath('.//td[contains(@class, "size")]/text()'))
        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)

        results.append({
            'url': href,
            'title': title,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'template': 'torrent.html'
        })

    return results
Beispiel #7
0
def response(resp):
    '''post-response callback
    resp: requests response object
    '''
    results = []

    dom = html.fromstring(resp.text)

    try:
        number_of_results_string =\
            re.sub('[^0-9]', '',
                   eval_xpath(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0])

        results.append({'number_of_results': int(number_of_results_string)})

    except:
        logger.debug("Couldn't read number of results.")
        pass

    for result in eval_xpath(dom, '//section[not(contains(@class, "essay"))]'):
        try:
            url = eval_xpath(result, './/h2/a')[0].get('href')
            url = urljoin(base_url, url)
            title = eval_xpath(result, 'string(.//h2/a)').strip()
            content = extract_text(eval_xpath(result, './/p'))
            # append result
            results.append({'url': url, 'title': title, 'content': content})
        except:
            logger.debug('result parse error in:\n%s',
                         etree.tostring(result, pretty_print=True))
            continue

    return results
Beispiel #8
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//div[@id="search_res"]/table/tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
        content = "<br />".join(content.split("\n"))

        filesize = result.xpath(
            './/span[@class="attr_val"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath(
            './/span[@class="attr_val"]/text()')[0].split()[1]
        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        leech = 0

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #9
0
def response(resp):
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('.//td[@class="x-item"]')

    if not search_res:
        return list()

    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = extract_text(result.xpath('.//a[@title]'))
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(
            result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE],
                                    files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath(
            './/div[@class="tail"]//a[@class="title"]/@href')[0]

        results.append({
            'url': url,
            'title': title,
            'content': content,
            'filesize': filesize,
            'magnetlink': magnetlink,
            'seed': 'N/A',
            'leech': 'N/A',
            'template': 'torrent.html'
        })

    return results
Beispiel #10
0
def response(resp):
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('.//td[@class="x-item"]')

    if not search_res:
        return list()

    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = extract_text(result.xpath('.//a[@title]'))
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]

        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'filesize': filesize,
                        'magnetlink': magnetlink,
                        'seed': 'N/A',
                        'leech': 'N/A',
                        'template': 'torrent.html'})

    return results
Beispiel #11
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this...
        title = escape(extract_text(link))
        thumbnail_tags = result.xpath(thumbnail_xpath)
        thumbnail = None
        if len(thumbnail_tags) > 0:
            thumbnail = extract_text(thumbnail_tags[0])
            if thumbnail[0] == '/':
                thumbnail = base_url + thumbnail
        content = escape(extract_text(result.xpath(content_xpath)))

        # append result
        results.append({'url': href,
                        'title': title,
                        'img_src': thumbnail,
                        'content': content})

    # return results
    return results
Beispiel #12
0
def preferences():
    """Render preferences page && save user preferences"""

    # save preferences
    if request.method == 'POST':
        resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
        try:
            request.preferences.parse_form(request.form)
        except ValidationException:
            request.errors.append(gettext('Invalid settings, please edit your preferences'))
            return resp
        return request.preferences.save(resp)

    # render preferences
    image_proxy = request.preferences.get_value('image_proxy')
    lang = request.preferences.get_value('language')
    disabled_engines = request.preferences.engines.get_disabled()
    allowed_plugins = request.preferences.plugins.get_enabled()

    # stats for preferences page
    stats = {}

    for c in categories:
        for e in categories[c]:
            stats[e.name] = {'time': None,
                             'warn_timeout': False,
                             'warn_time': False}
            if e.timeout > settings['outgoing']['request_timeout']:
                stats[e.name]['warn_timeout'] = True
            stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)

    # get first element [0], the engine time,
    # and then the second element [1] : the time (the first one is the label)
    for engine_stat in get_engines_stats()[0][1]:
        stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3)
        if engine_stat.get('avg') > settings['outgoing']['request_timeout']:
            stats[engine_stat.get('name')]['warn_time'] = True
    # end of stats

    return render('preferences.html',
                  locales=settings['locales'],
                  current_locale=get_locale(),
                  image_proxy=image_proxy,
                  engines_by_category=categories,
                  stats=stats,
                  answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
                  disabled_engines=disabled_engines,
                  autocomplete_backends=autocomplete_backends,
                  shortcuts={y: x for x, y in engine_shortcuts.items()},
                  themes=themes,
                  plugins=plugins,
                  doi_resolvers=settings['doi_resolvers'],
                  current_doi_resolver=get_doi_resolver(request.args, request.preferences.get_value('doi_resolver')),
                  allowed_plugins=allowed_plugins,
                  theme=get_current_theme_name(),
                  preferences_url_params=request.preferences.get_as_url_params(),
                  base_url=get_base_url(),
                  preferences=True)
Beispiel #13
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//div[@class="one_result"]')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//div[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)

        excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
        content = html.tostring(excerpt,
                                encoding='unicode',
                                method='text',
                                with_tail=False)
        # it is better to emit <br/> instead of |, but html tags are verboten
        content = content.strip().replace('\n', ' | ')
        content = ' '.join(content.split())

        filesize = result.xpath(
            './/span[@class="torrent_size"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath(
            './/span[@class="torrent_size"]/text()')[0].split()[1]
        files = (result.xpath('.//span[@class="torrent_files"]/text()')
                 or ['1'])[0]

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        try:
            files = int(files)
        except:
            files = None

        magnetlink = result.xpath(
            './/div[@class="torrent_magnet"]//a')[0].attrib['href']

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return results
Beispiel #14
0
def preferences():
    """Render preferences page && save user preferences"""

    # save preferences
    if request.method == 'POST':
        resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
        try:
            request.preferences.parse_form(request.form)
        except ValidationException:
            request.errors.append(gettext('Invalid settings, please edit your preferences'))
            return resp
        return request.preferences.save(resp)

    # render preferences
    image_proxy = request.preferences.get_value('image_proxy')
    lang = request.preferences.get_value('language')
    disabled_engines = request.preferences.engines.get_disabled()
    allowed_plugins = request.preferences.plugins.get_enabled()

    # stats for preferences page
    stats = {}

    for c in categories:
        for e in categories[c]:
            stats[e.name] = {'time': None,
                             'warn_timeout': False,
                             'warn_time': False}
            if e.timeout > settings['outgoing']['request_timeout']:
                stats[e.name]['warn_timeout'] = True
            stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)

    # get first element [0], the engine time,
    # and then the second element [1] : the time (the first one is the label)
    for engine_stat in get_engines_stats()[0][1]:
        stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3)
        if engine_stat.get('avg') > settings['outgoing']['request_timeout']:
            stats[engine_stat.get('name')]['warn_time'] = True
    # end of stats

    return render('preferences.html',
                  locales=settings['locales'],
                  current_locale=get_locale(),
                  image_proxy=image_proxy,
                  engines_by_category=categories,
                  stats=stats,
                  answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
                  disabled_engines=disabled_engines,
                  autocomplete_backends=autocomplete_backends,
                  shortcuts={y: x for x, y in engine_shortcuts.items()},
                  themes=themes,
                  plugins=plugins,
                  doi_resolvers=settings['doi_resolvers'],
                  current_doi_resolver=get_doi_resolver(request.args, request.preferences.get_value('doi_resolver')),
                  allowed_plugins=allowed_plugins,
                  theme=get_current_theme_name(),
                  preferences_url_params=request.preferences.get_as_url_params(),
                  base_url=get_base_url(),
                  preferences=True)
Beispiel #15
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//div[@id="search_res"]/table/tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res:
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
        content = "<br />".join(content.split("\n"))

        filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        leech = 0

        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)

        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #16
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@class="data"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))
        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
        filesize_info = extract_text(
            result.xpath('.//td[contains(@class, "nobr")]'))
        files = extract_text(
            result.xpath('.//td[contains(@class, "center")][2]'))

        seed = convert_str_to_int(seed)
        leech = convert_str_to_int(leech)

        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']

        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'filesize': filesize,
            'files': files,
            'magnetlink': magnetlink,
            'torrentfile': torrentfileurl,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #17
0
def extract_url(xpath_results, search_url):
    if xpath_results == []:
        raise Exception('Empty url resultset')
    url = extract_text(xpath_results)

    if url.startswith('//'):
        # add http or https to this kind of url //example.com/
        parsed_search_url = urlparse(search_url)
        url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
    elif url.startswith('/'):
        # fix relative url to the search engine
        url = urljoin(search_url, url)

    # fix relative urls that fall through the crack
    if '://' not in url:
        url = urljoin(search_url, url)

    # normalize url
    url = normalize_url(url)

    return url
Beispiel #18
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@id="searchResult"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//div[@class="detName"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))
        seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0

        magnetlink = result.xpath(magnet_xpath)[0]
        torrentfile_links = result.xpath(torrent_xpath)
        if torrentfile_links:
            torrentfile_link = torrentfile_links[0].attrib.get('href')
        else:
            torrentfile_link = None

        # append result
        results.append({
            'url': href,
            'title': title,
            'content': content,
            'seed': seed,
            'leech': leech,
            'magnetlink': magnetlink.attrib.get('href'),
            'torrentfile': torrentfile_link,
            'template': 'torrent.html'
        })

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #19
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@class="data"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))
        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
        filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
        files = extract_text(result.xpath('.//td[contains(@class, "center")][2]'))

        seed = convert_str_to_int(seed)
        leech = convert_str_to_int(leech)

        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']

        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'torrentfile': torrentfileurl,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #20
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@id="searchResult"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//div[@class="detName"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))
        seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0

        magnetlink = result.xpath(magnet_xpath)[0]
        torrentfile_links = result.xpath(torrent_xpath)
        if torrentfile_links:
            torrentfile_link = torrentfile_links[0].attrib.get('href')
        else:
            torrentfile_link = None

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'magnetlink': magnetlink.attrib.get('href'),
                        'torrentfile': torrentfile_link,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #21
0
def extract_url(xpath_results, search_url):
    if xpath_results == []:
        raise Exception('Empty url resultset')
    url = extract_text(xpath_results)

    if url.startswith('//'):
        # add http or https to this kind of url //example.com/
        parsed_search_url = urlparse(search_url)
        url = parsed_search_url.scheme + url
    elif url.startswith('/'):
        # fix relative url to the search engine
        url = urljoin(search_url, url)

    # normalize url
    url = normalize_url(url)

    return url
Beispiel #22
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath(content_xpath))

        # append result
        results.append({'url': href, 'title': title, 'content': content})

    # return results
    return results
Beispiel #23
0
def extract_url(xpath_results, search_url):
    if xpath_results == []:
        raise Exception('Empty url resultset')
    url = extract_text(xpath_results)

    if url.startswith('//'):
        # add http or https to this kind of url //example.com/
        parsed_search_url = urlparse(search_url)
        url = u'{0}:{1}'.format(parsed_search_url.scheme, url)
    elif url.startswith('/'):
        # fix relative url to the search engine
        url = urljoin(search_url, url)

    # normalize url
    url = normalize_url(url)

    return url
Beispiel #24
0
def response(resp):
    # get the base URL for the language in which request was made
    language = locale_to_lang_code(resp.search_params['language'])
    base_url = get_lang_urls(language)['base']

    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(xpath_results):
        link = result.xpath(xpath_link)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(link)

        results.append({'url': href, 'title': title})

    return results
Beispiel #25
0
def fetch_firefox_versions():
    resp = requests.get(URL, timeout=2.0)
    if resp.status_code != 200:
        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
    else:
        dom = html.fromstring(resp.text)
        versions = []

        for link in dom.xpath('//a/@href'):
            url = urlparse(urljoin(URL, link))
            path = url.path
            if path.startswith(RELEASE_PATH):
                version = path[len(RELEASE_PATH):-1]
                if NORMAL_REGEX.match(version):
                    versions.append(LooseVersion(version))

        list.sort(versions, reverse=True)
        return versions
Beispiel #26
0
def response(resp):
    # get the base URL for the language in which request was made
    language = locale_to_lang_code(resp.search_params['language'])
    base_url = get_lang_urls(language)['base']

    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(xpath_results):
        link = result.xpath(xpath_link)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(link)

        results.append({'url': href,
                        'title': title})

    return results
def fetch_firefox_versions():
    resp = requests.get(URL, timeout=2.0)
    if resp.status_code != 200:
        raise Exception("Error fetching firefox versions, HTTP code " +
                        resp.status_code)
    else:
        dom = html.fromstring(resp.text)
        versions = []

        for link in dom.xpath('//a/@href'):
            url = urlparse(urljoin(URL, link))
            path = url.path
            if path.startswith(RELEASE_PATH):
                version = path[len(RELEASE_PATH):-1]
                if NORMAL_REGEX.match(version):
                    versions.append(LooseVersion(version))

        list.sort(versions, reverse=True)
        return versions
def response(resp):
    img_results = []
    text_results = []

    search_results = json.loads(resp.text)

    # return empty array if there are no results
    if 'data' not in search_results:
        return []

    posts = search_results.get('data', {}).get('children', [])

    # process results
    for post in posts:
        data = post['data']

        # extract post information
        params = {
            'url': urljoin(base_url, data['permalink']),
            'title': data['title']
        }

        # if thumbnail field contains a valid URL, we need to change template
        thumbnail = data['thumbnail']
        url_info = urlparse(thumbnail)
        # netloc & path
        if url_info[1] != '' and url_info[2] != '':
            params['img_src'] = data['url']
            params['thumbnail_src'] = thumbnail
            params['template'] = 'images.html'
            img_results.append(params)
        else:
            created = datetime.fromtimestamp(data['created_utc'])
            content = data['selftext']
            if len(content) > 500:
                content = content[:500] + '...'
            params['content'] = content
            params['publishedDate'] = created
            text_results.append(params)

    # show images first and text results second
    return img_results + text_results
Beispiel #29
0
def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    result_rows = dom.xpath(torrent_xpath)

    try:
        script_element = dom.xpath(script_xpath)[0]
        json_string = script_element.text[script_element.text.find('{'):]
        torrents_json = loads(json_string)
    except:
        return []

    # parse results
    for torrent_row, torrent_json in zip(result_rows, torrents_json['data']['list']):
        title = torrent_json['name']
        seed = int(torrent_json['seeds'])
        leech = int(torrent_json['peers'])
        size = int(torrent_json['size'])
        torrent_hash = torrent_json['hash']

        torrentfile = torrent_file_url.format(torrent_hash=torrent_hash)
        magnetlink = 'magnet:?xt=urn:btih:{}'.format(torrent_hash)

        age = extract_text(torrent_row.xpath(age_xpath))
        link = torrent_row.xpath(link_xpath)[0]

        href = urljoin(url, link)

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': age,
                        'seed': seed,
                        'leech': leech,
                        'filesize': size,
                        'torrentfile': torrentfile,
                        'magnetlink': magnetlink,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
Beispiel #30
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for tweet in dom.xpath(results_xpath):
        try:
            link = tweet.xpath(link_xpath)[0]
            content = extract_text(tweet.xpath(content_xpath)[0])
            img_src = tweet.xpath(avatar_xpath)[0]
            img_src = img_src.replace('_bigger', '_normal')
        except Exception:
            continue

        url = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(tweet.xpath(title_xpath))

        pubdate = tweet.xpath(timestamp_xpath)
        if len(pubdate) > 0:
            timestamp = float(pubdate[0].attrib.get('data-time'))
            publishedDate = datetime.fromtimestamp(timestamp, None)
            # append result
            results.append({
                'url': url,
                'title': title,
                'content': content,
                'img_src': img_src,
                'publishedDate': publishedDate
            })
        else:
            # append result
            results.append({
                'url': url,
                'title': title,
                'content': content,
                'img_src': img_src
            })

    # return results
    return results
Beispiel #31
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for result in dom.xpath('//table[contains(@class, "table-list")]/tbody//tr'):
        href = urljoin(url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0])
        title = extract_text(result.xpath('./td[contains(@class, "name")]/a[2]'))
        seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]'))
        leech = extract_text(result.xpath('.//td[contains(@class, "leeches")]'))
        filesize_info = extract_text(result.xpath('.//td[contains(@class, "size")]/text()'))
        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)

        results.append({'url': href,
                        'title': title,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'template': 'torrent.html'})

    return results
Beispiel #32
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for tweet in dom.xpath(results_xpath):
        try:
            link = tweet.xpath(link_xpath)[0]
            content = extract_text(tweet.xpath(content_xpath)[0])
            img_src = tweet.xpath(avatar_xpath)[0]
            img_src = img_src.replace('_bigger', '_normal')
        except Exception:
            continue

        url = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(tweet.xpath(title_xpath))

        pubdate = tweet.xpath(timestamp_xpath)
        if len(pubdate) > 0:
            timestamp = float(pubdate[0].attrib.get('data-time'))
            publishedDate = datetime.fromtimestamp(timestamp, None)
            # append result
            results.append({'url': url,
                            'title': title,
                            'content': content,
                            'img_src': img_src,
                            'publishedDate': publishedDate})
        else:
            # append result
            results.append({'url': url,
                            'title': title,
                            'content': content,
                            'img_src': img_src})

    # return results
    return results
Beispiel #33
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for k, result in enumerate(dom.xpath(results_xpath)[1:]):
        try:
            from_result, to_results_raw = result.xpath('./td')
        except:
            continue

        to_results = []
        for to_result in to_results_raw.xpath('./p/a'):
            t = to_result.text_content()
            if t.strip():
                to_results.append(to_result.text_content())

        results.append({
            'url': urljoin(resp.url, '?%d' % k),
            'title': from_result.text_content(),
            'content': '; '.join(to_results)
        })

    return results
Beispiel #34
0
def response(resp):
    results = []

    response_json = loads(resp.text)

    # parse results
    for result in response_json['photos']:
        url = urljoin(base_url, result['url'])
        title = result['name']
        # last index is the biggest resolution
        img_src = result['image_url'][-1]
        thumbnail_src = result['image_url'][0]
        content = result['description'] or ''

        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': content,
                        'thumbnail_src': thumbnail_src,
                        'template': 'images.html'})

    # return results
    return results
Beispiel #35
0
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
        try:
            from_result, to_results_raw = eval_xpath(result, './td')
        except:
            continue

        to_results = []
        for to_result in eval_xpath(to_results_raw, './p/a'):
            t = to_result.text_content()
            if t.strip():
                to_results.append(to_result.text_content())

        results.append({
            'url': urljoin(resp.url, '?%d' % k),
            'title': from_result.text_content(),
            'content': '; '.join(to_results)
        })

    return results
Beispiel #36
0
def clear_cookies():
    resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
    for cookie_name in request.cookies:
        resp.delete_cookie(cookie_name)
    return resp
Beispiel #37
0
def clear_cookies():
    resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
    for cookie_name in request.cookies:
        resp.delete_cookie(cookie_name)
    return resp