Ejemplo n.º 1
0
class YahooMarketNewsClassifierPipeline(object):
    """
    """
    def __init__(self):
        self.classifier = YahooClassifier()

    def process_item(self, item, spider):
        if 'yahoo' not in getattr(spider,"pipelines",[]):
            return item
        # pipeline
        if item['source'] in ['ABC']:
            item['yahoo_market_news'] = 0
            return item
        # using classifier here for other source
        cat = self.classifier.classify(item['content'])
        item['yahoo_market_news'] = cat
        log.msg("category: %s as %d" % (item['title'],cat), level=log.INFO)
        return item
Ejemplo n.º 2
0
 def __init__(self):
     self.classifier = YahooClassifier()