class YahooMarketNewsClassifierPipeline(object): """ """ def __init__(self): self.classifier = YahooClassifier() def process_item(self, item, spider): if 'yahoo' not in getattr(spider,"pipelines",[]): return item # pipeline if item['source'] in ['ABC']: item['yahoo_market_news'] = 0 return item # using classifier here for other source cat = self.classifier.classify(item['content']) item['yahoo_market_news'] = cat log.msg("category: %s as %d" % (item['title'],cat), level=log.INFO) return item
def __init__(self): self.classifier = YahooClassifier()