Exemple #1
0
 def _spider_templates(self, spider_templates, extractors):
     """
     Find all templates for a legacy spider and combine them into a single
     list.
     """
     templates, added = [], set()
     for template_path in spider_templates:
         added.add(template_path)
         existing = {}
         template = self.read_file(template_path, deserialize=True)
         if template is None:
             continue
         template_extractors = template.get('extractors', {})
         if not isinstance(template_extractors, dict):
             template_extractors = {
                 e.get('field'): e.get('id')
                 for e in template_extractors if 'field' in e and e['field']
             }
         for field, eids in template_extractors.items():
             existing[field] = [eid for eid in eids if eid in extractors]
         template['extractors'] = existing
         templates.append(template)
     return templates, added
Exemple #2
0
 def _spider_templates(self, spider_templates, extractors):
     """
     Find all templates for a legacy spider and combine them into a single
     list.
     """
     templates, added = [], set()
     for template_path in spider_templates:
         added.add(template_path)
         existing = {}
         template = self.read_file(template_path, deserialize=True)
         if template is None:
             continue
         template_extractors = template.get('extractors', {})
         if not isinstance(template_extractors, dict):
             template_extractors = {e.get('field'): e.get('id')
                                    for e in template_extractors
                                    if 'field' in e and e['field']}
         for field, eids in template_extractors.items():
             existing[field] = [eid for eid in eids
                                if eid in extractors]
         template['extractors'] = existing
         templates.append(template)
     return templates, added