def _get_annotated_template(self, template): if (template.get('version', '0.12.0') >= '0.13.0' and not template.get('annotated')): using_js = self.spider._filter_js_urls(template['url']) template['body'] = 'rendered_body' if using_js else 'original_body' _build_sample(template) return template
def load_project_data(open_func, spiders_list_func, project_dir): """Load project data using provided open_func and project directory.""" # Load items and extractors from project schemas = open_func(project_dir, 'items') extractors = open_func(project_dir, 'extractors') # Load spiders and templates spiders = {} spiders_list = spiders_list_func(project_dir) for spider_name in spiders_list: spider = open_func(project_dir, 'spiders', spider_name) if not spider: log.warning('Skipping "%s" spider as there is no data', spider_name) continue if 'template_names' in spider: samples = spider.get('template_names', []) spider['templates'] = [] for sample_name in samples: sample = open_func(project_dir, 'spiders', spider_name, sample_name) _build_sample(sample) spider['templates'].append(sample) else: for sample in spider.get('templates', []): _build_sample(sample) spiders[spider_name] = (IblSpider(spider_name, spider, schemas, extractors, Settings()), spider) return schemas, extractors, spiders
def load_project_data(open_func, spiders_list_func, project_dir): """Load project data using provided open_func and project directory.""" # Load items and extractors from project schemas = open_func(project_dir, 'items') extractors = open_func(project_dir, 'extractors') # Load spiders and templates spiders = {} spiders_list = spiders_list_func(project_dir) for spider_name in spiders_list: spider = open_func(project_dir, 'spiders', spider_name) if not spider: log.warning( 'Skipping "%s" spider as there is no data', spider_name ) continue if 'template_names' in spider: samples = spider.get('template_names', []) spider['templates'] = [] for sample_name in samples: sample = open_func(project_dir, 'spiders', spider_name, sample_name) _build_sample(sample) spider['templates'].append(sample) else: for sample in spider.get('templates', []): _build_sample(sample) spiders[spider_name] = (IblSpider(spider_name, spider, schemas, extractors, Settings()), spider) return schemas, extractors, spiders
def _get_annotated_template(self, template): changed = False if template.get('version', '0.12.0') >= '0.13.0': using_js = self.spider._filter_js_urls(template['url']) body = 'rendered_body' if using_js else 'original_body' if template.get('body') != body: template['body'] = body changed = True if changed or not template.get('annotated'): _build_sample(template) return template
def _get_annotated_template(self, template): if template.get('version', '0.12.0') >= '0.13.0': _build_sample(template) return template