def get_cache(cache_name): try: # Tests will write to the same database, but in a different table # (defined by the testing namespace of the TEST_PARAMS) return create_cache(cache_name) except OperationalError: return
def test_get_cached_ids(): mock_cache = create_cache('mock_cache') mock_cache.storage = { 'table-name': { 'foo': 'foo!', 'bar': 'bar!', 'baz': 'baz!', } } assert mock_cache.get_cached_ids(namespace='table-name') == \ set(['foo', 'bar', 'baz'])
def test_set(namespace, test_data, as_json): mock_cache = create_cache('mock_cache') # Test the set method mock_cache.set(test_data, namespace, as_json=as_json) # Manually get the test values to compare them, json-load if necessary cached_data = mock_cache.storage[namespace] if as_json: cached_data = {k: json.loads(v) for k, v in cached_data.items()} for k in test_data: assert cached_data[k] == test_data[k]
def test_get(namespace, test_data, as_json): mock_cache = create_cache('mock_cache') # Manually set the test values, manually json-dump if necessary: values_to_set = test_data if as_json: values_to_set = {k: json.dumps(v) for k, v in test_data.items()} mock_cache.storage[namespace].update(values_to_set) # Test the get method, which should automatically json-load if needed: test_keys = list(test_data.keys()) cached_data = mock_cache.get(test_keys, namespace, as_json=as_json) for k in test_keys: assert cached_data[k] == test_data[k]
def __init__(self, cache='redis', proxies=None, **cache_kwargs): """ Initialize with a cache name ('redis', 'postgres') or a Cache instance (RedisCache, PostgresCache). Extra kwargs can be passed to the cache initializer. Set proxies as a dict like {'http': 'socks5://localhost:9050'} or as an empty dict in case you're using a ParallelWebAnnotator. """ super().__init__() self.proxies = proxies self.cache_kwargs = cache_kwargs if isinstance(cache, Cache): self.cache = cache else: self.cache = create_cache(cache, **cache_kwargs)
def renew_cache(ids, annotator_class): mysql_cache = create_cache('mysql') proxies = {'http': 'socks5://caladan.local:9050'} annotator = annotator_class(cache=mysql_cache, proxies=proxies) failed_ids = defaultdict(list) # This chunking of IDs is done to avoid a single exception to ruin the # annotation of all IDs. We catch it instead and try to annotate the # chunk one ID at a time: chunk_size = 1_000 for group_of_ids in chunked(ids, chunk_size): try: # This call is enough renew the cached ids: annotator.annotate(group_of_ids, use_cache=False, parse=False) except Exception: for id_ in group_of_ids: try: annotator.annotate_one(id_, use_cache=False, parse=False) except Exception: failed_ids[annotator.SOURCE_NAME].append(id_) pass return dict(failed_ids)
def __init__(self, cache, use_cache=True, use_web=True, proxies=None, sleep_time=None, clinvar_vcf_path=None, genome_assembly='GRCh37.p13', **cache_kwargs): """ Initialize a pipeline with a given set of options. The options will be used for any subsequent pipeline.run() actions. - cache is mandatory. You can 'manually' instantiate a Cache (either PostgresCache or RedisCache) and pass it here, or you can specify 'redis' or 'postgres' and let the pipeline do that for you. - use_cache (default=True): whether to use or not data found in cache for each variant. - use_web (default=True): whether to use or not web data to annotate the variants. If use_cache is also set, the web will be used only to annotate the ones not found in cache. If use_cache=False, every variant will be annotated from web, updating any previous cached data for those variants. - proxies (default=None) is optional. If set, it should be a dictionary of proxies that will be used by the requests library. For instance: {'http': 'socks5://localhost:9050'} - sleep_time (default=None) is optional. If set, it will be used to override all annotators SLEEP_TIME between queries. - genome_assembly: either "GRCh37.p13" or "GRCh38.p7", it will be used to generate position tags to identify each variant and match them to ClinVar Variation Reports in case the rs ID is not enough. - **cache_kwargs will be passed to the Cache constructor if the cache option is not already a Cache instance. See the docstring of Pipeline.run for some usage examples. """ if isinstance(cache, Cache): cache = cache else: cache = create_cache(cache, **cache_kwargs) self.annotation_kwargs = { 'cache': cache, 'use_cache': use_cache, 'use_web': use_web, 'proxies': proxies, 'sleep_time': sleep_time, } self.clinvar_vcf_path = clinvar_vcf_path self.genome_assembly = genome_assembly if proxies is None: raise NoProxiesException( "It's not advisable to run the complete pipeline " "without proxies, specially if you're going to " "annotate a lot of variants, because OMIM can get " "your IP banned. Try installing Tor locally and pass " "something this: " "proxies={'http': 'socks5://localhost:9050'}. If you " "still want to run without proxies, set proxies " "explicitely as an empty dict (proxies={}).")
def test_only_printable(): mock_cache = create_cache('mock_cache') assert mock_cache._only_printable('foo bar') == 'foo bar' assert mock_cache._only_printable('foobar') == 'foobar'