class IndexPipelineConfig(Component): implements(IndexPipelineConfig) def get_pipeline(self, *args, **kwargs): return [ self.env[Processor1Provider], self.env[Processor2Provider], ]
class ForcePipeline(Component): implements(IndexPipelineConfig) def get_pipeline(self, *args, **kwargs): return [ env[IndexAPIForward], env[LocalDumbIndex], env[LocalKV], ]
class CheckProcessor(Component): implements(IndexAPIProvider) schema_provider = ExtensionPoint(CheckProcessorSchemaProvider, unique=True) def get_index_api(self, **config): service = config['service'] default_schema = self.schema_provider.default_schema(service) query_schema = self.schema_provider.query_schema(service) card_schemas = self.schema_provider.card_schemas(service) return Check(default_schema, card_schemas, query_schema, **config)
class DumbIndexAPIConfiguration(Component): implements(IndexAPIConfigurationProvider) def get_index_api_conf(self, service, docido_user_id, account_login, config): return { 'service': service, 'docido_user_id': docido_user_id, 'account_login': account_login, }
class MyCrawler(Component): implements(ICrawler) def get_service_name(self): return 'fake-crawler' def iter_crawl_tasks(self, index, token, logger, full): ret = {'tasks': list(repeat(_crawl_task, tasks_count))} if with_epilogue: ret['epilogue'] = _epilogue return ret
class ForceConfig(Component): implements(IndexAPIConfigurationProvider) def get_index_api_conf(self, service, docido_user_id, account_login, config): return { 'local_storage': { 'documents': { 'path': env.temp_dir, }, 'kv': { 'path': env.temp_dir, }, }, }
class MyExactCrawler(Component): implements(ICrawler) def get_service_name(self): return 'fake-crawler' def iter_crawl_tasks(self, index, token, config, logger): return { 'tasks': [ list(repeat(_increment_task, 10)), list(repeat(_increment_task, 13)), ], 'epilogue': _epilogue, }
class MyRetryCrawler(Component): implements(ICrawler) def get_service_name(self): return 'fake-crawler' def iter_crawl_tasks(self, index, token, config, logger): return { 'tasks': [ _retry_crawl_task, functools.partial(_retry_crawl_task, max_retries=2), ], 'epilogue': _retry_epilogue, }
class DocidoCheckProcessorSchemaProvider(Component): implements(CheckProcessorSchemaProvider) indexing_config = ExtensionPoint(PullCrawlerIndexingConfig, unique=True) def _get_config(self, indexing_config): check_processor = indexing_config.get('check_processor', {}) return check_processor.get('schemas', {}) @lazy def _core_config(self): return self._get_config(self.indexing_config.core()) def _crawler_config(self, service): return self._get_config(self.indexing_config.service(service)) def _schema_from_dicts(self, core_conf, crawler_conf): schema, options = from_dict( merge_dicts(copy.deepcopy(core_conf), copy.deepcopy(crawler_conf))) return voluptuous.Schema(schema, **options) def _get_schemas(self, service): kind_schemas = self._core_config.get('card', {}).get('kind', {}) or {} return { k: self._schema_from_dicts( v, copy.deepcopy(self._crawler_config(service).get(k, {}))) for k, v in kind_schemas.iteritems() } def card_schemas(self, service): return self._get_schemas(service) def default_schema(self, service): core_default = self._core_config.get('card', {}).get('default', {}) crawler_config = self._crawler_config(service).get('card', {}).get( 'default', {}) return self._schema_from_dicts(core_default, crawler_config) def query_schema(self, service): core_query = self._core_config.get('query', {}) crawler_query = self._crawler_config(service).get('query', {}) return self._schema_from_dicts(core_query, crawler_query)
class YamlPullCrawlersIndexingConfig(Component): implements(PullCrawlerIndexingConfig, IndexPipelineConfig) index_api_providers = ExtensionPoint(IndexAPIProvider) def service(self, service): prs = docido_config.get('pull_crawlers') or {} crawlers_config = prs.get('crawlers') or {} return (crawlers_config.get(service) or {}).get('indexing', {}) def core(self): return docido_config.pull_crawlers.indexing def get_pipeline(self, service): service_config = self.service(service) if 'pipeline' in service_config: processor_pipeline = service_config.pipeline else: indexing_config = self.core() processor_pipeline = indexing_config.pipeline providers = dict([(p.__class__.__name__, p) for p in list(self.index_api_providers)]) return list(map(lambda p: providers[p], processor_pipeline))
class Processor2Provider(Component): implements(IndexAPIProvider) def get_index_api(self, parent=None, **config): return Processor2(parent, **config)
class FooComponent(Component): implements(FooInterface, Foobar)
class LocalKV(Component): implements(IndexAPIProvider) def get_index_api(self, **config): return LocalKVProcessor(**config)
class IndexAPIForward(Component): implements(IndexAPIProvider) def get_index_api(self, **config): return IndexAPIProcessor(**config)
class IndexAPIForwardProcessor(Component): implements(IndexAPIProcessor)
class BarComponent(Component): implements(BarInterface, Foobar)
class Elasticsearch(Component): implements(IndexAPIProvider) def get_index_api(self, **config): return ElasticsearchProcessor(**config)