Beispiel #1
0
    def task_page(self, task, error=None):
        if task.response.status_code != 200:
            yield task
            return

        task.html.make_links_absolute()

        if self.LOOKUP_ALL_PAGES:
            pages = task.html.xpath('//div[contains(@class, "pages_list")]/a/@href', all=True)
            for url in pages:
                if url not in self.looked_pages:
                    yield Task(
                        task=task,
                        handler='page',
                        url=url
                    )
                    self.looked_pages.append(url)

        projects = task.html.structured_xpath(
            '//table[@class="items_list"]/tbody/tr[not(position()=1)]',
            x(
                './td[1]',
                x(
                    './a[1]',
                    name='./text()',
                    url='./@href'
                ),
                time=(
                    './div[1]/noindex[last()]/following-sibling::text()',
                    self.weblancer_time
                )
            ),
            cost='./td[2]/*[1]/text()',
            answers=('./td[3]/text()', int)
        )

        for p in projects:
            for key, value in p.iteritems():
                print '%-11s: %s' % (key, value)
            print '*' * 20
    def task_page(self, task, error=None):
        if task.response.status_code != 200:
            logger.debug(u'Код возврата неверный (!=200) - повтор задачи')
            yield task
            return

        logger.debug(u'Задача выполнена %s успешно' % task.request.url)

        items = task.html.structured_xpath(
            '//div[@class="b-mini-card"]/div/div',
            x(
                './div[1]',
                kind='./a[1]/text()',
                year='./a[2]/text()',
            ),
            x(
                './div[@class="b-mini-card__body"]',
                x(
                    './h2/a',
                    url='./@href',
                    title='./text()'
                ),
                description='./div[@class="b-mini-card__desc"]/text()',
                tags=x(
                    './div[@class="b-mini-card__tags"]/a',
                    url='./@href',
                    name='./text()'
                )
            ),
            img='./a[1]/img/@src'
        )

        logger.debug(u'Извлечено %d фильмов' % len(items))

        for item in items:
            print '*' * 80
            for key, value in item.iteritems():
                print '%-11s: %s' % (key, value)
            print
Beispiel #3
0
    def task_lookup(self, task, error=None):
        if error or task.response.status_code != 200:
            yield task
            return

        task.html.make_links_absolute()

        if task.html.xpath_exists('//div[@class="roomtitle"]'):
            name = task.html.xpath('//div[@class="roomtitle"]/text()')
            date = task.html.xpath('//div[@class="logdate"]/text()')
            jid = task.html.xpath('//a[@class="roomjid"]/text()')
            print '%s (%s), %s' % (name, jid, date)

            items = task.html.structured_xpath(
                '//a[@class="ts"]',
                x(
                    './following-sibling::font',
                    class_='./@class',
                    message='./text()',
                    text='./following-sibling::text()'
                ),
                time='./@name'
            )

            for item in items:
                print '%s %3s | %s%s' % (
                    item.time, item.class_,
                    item.message,
                    '' if item.class_ != 'mn' else ': ' + item.text
                )
        else:
            for link in task.html.xpath('//a/@href', all=True):
                link = str(link)
                if link.startswith(LogsScanner.START_URL):
                    yield Task(
                        url=link,
                        handler='lookup'
                    )