예제 #1
0
def get_node(text, namespaces=None):
    """Get a scrapy selector for the given text node."""
    node = Selector(text=text, type="xml")
    if namespaces:
        for ns in namespaces:
            node.register_namespace(ns[0], ns[1])
    return node
예제 #2
0
def get_node(text, namespaces=None):
    """Get a scrapy selector for the given text node."""
    node = Selector(text=text, type="xml")
    if namespaces:
        for ns in namespaces:
            node.register_namespace(ns[0], ns[1])
    return node
예제 #3
0
    def parse_node(self, response, node):
        sel = Selector(response)
        sel.register_namespace("wsj", "http://dowjones.net/rss/")

        if is_todays_article(node):
            title = node.xpath('title/text()').get().strip()
            description = remove_html(node.xpath('description/text()').get())
            yield {
                "title":
                title,
                "link":
                node.xpath('link/text()').get().strip(),
                "description":
                description,
                "date":
                transform_date(node.xpath('pubDate/text()').get()),
                "categories":
                self.get_categories(
                    sel.xpath('//wsj:articletype/text()').getall(), title,
                    self.category_classifier),
                "source":
                "Wallstreet Journal",
                "sentiment":
                self.sentiment_classifier.classify("{} {}".format(
                    title, description))
            }