Exemple #1
0
 def _get_annotated_template(self, template):
     if (template.get('version', '0.12.0') >= '0.13.0'
             and not template.get('annotated')):
         using_js = self.spider._filter_js_urls(template['url'])
         template['body'] = 'rendered_body' if using_js else 'original_body'
         _build_sample(template)
     return template
Exemple #2
0
def load_project_data(open_func, spiders_list_func, project_dir):
    """Load project data using provided open_func and project directory."""
    # Load items and extractors from project
    schemas = open_func(project_dir, 'items')
    extractors = open_func(project_dir, 'extractors')

    # Load spiders and templates
    spiders = {}
    spiders_list = spiders_list_func(project_dir)
    for spider_name in spiders_list:
        spider = open_func(project_dir, 'spiders', spider_name)
        if not spider:
            log.warning('Skipping "%s" spider as there is no data',
                        spider_name)
            continue
        if 'template_names' in spider:
            samples = spider.get('template_names', [])
            spider['templates'] = []
            for sample_name in samples:
                sample = open_func(project_dir, 'spiders', spider_name,
                                   sample_name)
                _build_sample(sample)
                spider['templates'].append(sample)
        else:
            for sample in spider.get('templates', []):
                _build_sample(sample)
        spiders[spider_name] = (IblSpider(spider_name, spider, schemas,
                                          extractors, Settings()), spider)
    return schemas, extractors, spiders
Exemple #3
0
 def _get_annotated_template(self, template):
     if (template.get('version', '0.12.0') >= '0.13.0' and
             not template.get('annotated')):
         using_js = self.spider._filter_js_urls(template['url'])
         template['body'] = 'rendered_body' if using_js else 'original_body'
         _build_sample(template)
     return template
Exemple #4
0
def load_project_data(open_func, spiders_list_func, project_dir):
    """Load project data using provided open_func and project directory."""
    # Load items and extractors from project
    schemas = open_func(project_dir, 'items')
    extractors = open_func(project_dir, 'extractors')

    # Load spiders and templates
    spiders = {}
    spiders_list = spiders_list_func(project_dir)
    for spider_name in spiders_list:
        spider = open_func(project_dir, 'spiders', spider_name)
        if not spider:
            log.warning(
                'Skipping "%s" spider as there is no data', spider_name
            )
            continue
        if 'template_names' in spider:
            samples = spider.get('template_names', [])
            spider['templates'] = []
            for sample_name in samples:
                sample = open_func(project_dir, 'spiders', spider_name,
                                   sample_name)
                _build_sample(sample)
                spider['templates'].append(sample)
        else:
            for sample in spider.get('templates', []):
                _build_sample(sample)
        spiders[spider_name] = (IblSpider(spider_name, spider, schemas,
                                          extractors, Settings()),
                                spider)
    return schemas, extractors, spiders
Exemple #5
0
 def _get_annotated_template(self, template):
     changed = False
     if template.get('version', '0.12.0') >= '0.13.0':
         using_js = self.spider._filter_js_urls(template['url'])
         body = 'rendered_body' if using_js else 'original_body'
         if template.get('body') != body:
             template['body'] = body
             changed = True
     if changed or not template.get('annotated'):
         _build_sample(template)
     return template
Exemple #6
0
 def _get_annotated_template(self, template):
     changed = False
     if template.get('version', '0.12.0') >= '0.13.0':
         using_js = self.spider._filter_js_urls(template['url'])
         body = 'rendered_body' if using_js else 'original_body'
         if template.get('body') != body:
             template['body'] = body
             changed = True
     if changed or not template.get('annotated'):
         _build_sample(template)
     return template
Exemple #7
0
 def _get_annotated_template(self, template):
     if template.get('version', '0.12.0') >= '0.13.0':
         _build_sample(template)
     return template
Exemple #8
0
 def _get_annotated_template(self, template):
     if template.get('version', '0.12.0') >= '0.13.0':
         _build_sample(template)
     return template