def task_page(self, task, error=None): if task.response.status_code != 200: yield task return task.html.make_links_absolute() if self.LOOKUP_ALL_PAGES: pages = task.html.xpath('//div[contains(@class, "pages_list")]/a/@href', all=True) for url in pages: if url not in self.looked_pages: yield Task( task=task, handler='page', url=url ) self.looked_pages.append(url) projects = task.html.structured_xpath( '//table[@class="items_list"]/tbody/tr[not(position()=1)]', x( './td[1]', x( './a[1]', name='./text()', url='./@href' ), time=( './div[1]/noindex[last()]/following-sibling::text()', self.weblancer_time ) ), cost='./td[2]/*[1]/text()', answers=('./td[3]/text()', int) ) for p in projects: for key, value in p.iteritems(): print '%-11s: %s' % (key, value) print '*' * 20
def task_page(self, task, error=None): if task.response.status_code != 200: logger.debug(u'Код возврата неверный (!=200) - повтор задачи') yield task return logger.debug(u'Задача выполнена %s успешно' % task.request.url) items = task.html.structured_xpath( '//div[@class="b-mini-card"]/div/div', x( './div[1]', kind='./a[1]/text()', year='./a[2]/text()', ), x( './div[@class="b-mini-card__body"]', x( './h2/a', url='./@href', title='./text()' ), description='./div[@class="b-mini-card__desc"]/text()', tags=x( './div[@class="b-mini-card__tags"]/a', url='./@href', name='./text()' ) ), img='./a[1]/img/@src' ) logger.debug(u'Извлечено %d фильмов' % len(items)) for item in items: print '*' * 80 for key, value in item.iteritems(): print '%-11s: %s' % (key, value) print
def task_lookup(self, task, error=None): if error or task.response.status_code != 200: yield task return task.html.make_links_absolute() if task.html.xpath_exists('//div[@class="roomtitle"]'): name = task.html.xpath('//div[@class="roomtitle"]/text()') date = task.html.xpath('//div[@class="logdate"]/text()') jid = task.html.xpath('//a[@class="roomjid"]/text()') print '%s (%s), %s' % (name, jid, date) items = task.html.structured_xpath( '//a[@class="ts"]', x( './following-sibling::font', class_='./@class', message='./text()', text='./following-sibling::text()' ), time='./@name' ) for item in items: print '%s %3s | %s%s' % ( item.time, item.class_, item.message, '' if item.class_ != 'mn' else ': ' + item.text ) else: for link in task.html.xpath('//a/@href', all=True): link = str(link) if link.startswith(LogsScanner.START_URL): yield Task( url=link, handler='lookup' )