class AdhocSpider(BfsSpider): name = 'adhoc' def __init__(self, *a, **kwargs): self.sim = CosineSimilarity(indexfile=settings.INDEX_FILE, threshold=settings.RELEVANCY_THRESHOLD) self.detector = IngredientsDetector() super(AdhocSpider, self).__init__(*a, **kwargs) def parse_item(self, response): item = RecipebotItem() body = response.meta['body'] result = self.detector.extract(body) if len(result) == 0: stats.inc_value('recipe/filtered_out') # probably not recipe page return item['url'] = response.url item['ingredients'] = [] for item in result: if item[2] >= 0.25: item['ingredients'].append(item[0]) return item
def __init__(self, *a, **kwargs): self.sim = CosineSimilarity(indexfile=settings.INDEX_FILE, threshold=settings.RELEVANCY_THRESHOLD) self.detector = IngredientsDetector() super(AdhocSpider, self).__init__(*a, **kwargs)