Esempio n. 1
0
    def parse(self, response):

        for x in xrange(1,11):
            _item = ArticleListItem()
            _item['title'] = response.xpath(Css.IBMBIGDATAHUB[x]['title']).extract()[0]
            _item['author'] = response.xpath(Css.IBMBIGDATAHUB[x]['author']).extract()[0]

            if response.xpath(Css.IBMBIGDATAHUB[x]['headImg']).extract() == []:
                _item['headImg'] = 'no img'
            else:
                _item['headImg'] = response.xpath(Css.IBMBIGDATAHUB[x]['headImg']).extract()[0]

            _item['abstract'] = response.xpath(Css.IBMBIGDATAHUB[x]['abstract']).extract()[0]
            _item['url'] = Css.IBMBIGDATAHUB['baseurl'] + response.xpath(Css.IBMBIGDATAHUB[x]['url']).extract()[0]
            _item['site'] = response.url
            _item['isContentDownload'] = False
            _item['created'] = Date.get_standard_time(response.xpath(Css.IBMBIGDATAHUB[x]['created']).extract()[0])
            yield _item