Ejemplo n.º 1
0
def get_cache(cache_name):
    try:
        # Tests will write to the same database, but in a different table
        # (defined by the testing namespace of the TEST_PARAMS)
        return create_cache(cache_name)
    except OperationalError:
        return
Ejemplo n.º 2
0
def test_get_cached_ids():
    mock_cache = create_cache('mock_cache')
    mock_cache.storage = {
        'table-name': {
            'foo': 'foo!',
            'bar': 'bar!',
            'baz': 'baz!',
        }
    }
    assert mock_cache.get_cached_ids(namespace='table-name') == \
        set(['foo', 'bar', 'baz'])
Ejemplo n.º 3
0
def test_set(namespace, test_data, as_json):
    mock_cache = create_cache('mock_cache')

    # Test the set method
    mock_cache.set(test_data, namespace, as_json=as_json)

    # Manually get the test values to compare them, json-load if necessary
    cached_data = mock_cache.storage[namespace]
    if as_json:
        cached_data = {k: json.loads(v) for k, v in cached_data.items()}

    for k in test_data:
        assert cached_data[k] == test_data[k]
Ejemplo n.º 4
0
def test_get(namespace, test_data, as_json):
    mock_cache = create_cache('mock_cache')

    # Manually set the test values, manually json-dump if necessary:
    values_to_set = test_data
    if as_json:
        values_to_set = {k: json.dumps(v) for k, v in test_data.items()}
    mock_cache.storage[namespace].update(values_to_set)

    # Test the get method, which should automatically json-load if needed:
    test_keys = list(test_data.keys())
    cached_data = mock_cache.get(test_keys, namespace, as_json=as_json)

    for k in test_keys:
        assert cached_data[k] == test_data[k]
Ejemplo n.º 5
0
    def __init__(self, cache='redis', proxies=None, **cache_kwargs):
        """
        Initialize with a cache name ('redis', 'postgres') or a Cache instance
        (RedisCache, PostgresCache). Extra kwargs can be passed to the cache
        initializer.

        Set proxies as a dict like {'http': 'socks5://localhost:9050'} or as
        an empty dict in case you're using a ParallelWebAnnotator.
        """
        super().__init__()
        self.proxies = proxies
        self.cache_kwargs = cache_kwargs

        if isinstance(cache, Cache):
            self.cache = cache
        else:
            self.cache = create_cache(cache, **cache_kwargs)
Ejemplo n.º 6
0
def renew_cache(ids, annotator_class):
    mysql_cache = create_cache('mysql')
    proxies = {'http': 'socks5://caladan.local:9050'}

    annotator = annotator_class(cache=mysql_cache, proxies=proxies)
    failed_ids = defaultdict(list)

    # This chunking of IDs is done to avoid a single exception to ruin the
    # annotation of all IDs. We catch it instead and try to annotate the
    # chunk one ID at a time:
    chunk_size = 1_000
    for group_of_ids in chunked(ids, chunk_size):
        try:
            # This call is enough renew the cached ids:
            annotator.annotate(group_of_ids, use_cache=False, parse=False)
        except Exception:
            for id_ in group_of_ids:
                try:
                    annotator.annotate_one(id_, use_cache=False, parse=False)
                except Exception:
                    failed_ids[annotator.SOURCE_NAME].append(id_)
                    pass

    return dict(failed_ids)
Ejemplo n.º 7
0
    def __init__(self,
                 cache,
                 use_cache=True,
                 use_web=True,
                 proxies=None,
                 sleep_time=None,
                 clinvar_vcf_path=None,
                 genome_assembly='GRCh37.p13',
                 **cache_kwargs):
        """
        Initialize a pipeline with a given set of options. The options will
        be used for any subsequent pipeline.run() actions.

        - cache is mandatory. You can 'manually' instantiate a Cache
          (either PostgresCache or RedisCache) and pass it here, or you can
          specify 'redis' or 'postgres' and let the pipeline do that for you.
        - use_cache (default=True): whether to use or not data found in cache
          for each variant.
        - use_web (default=True): whether to use or not web data to annotate
          the variants. If use_cache is also set, the web will be used only
          to annotate the ones not found in cache. If use_cache=False, every
          variant will be annotated from web, updating any previous cached
          data for those variants.
        - proxies (default=None) is optional. If set, it should be a dictionary
          of proxies that will be used by the requests library. For instance:
          {'http': 'socks5://localhost:9050'}
        - sleep_time (default=None) is optional. If set, it will be used to
          override all annotators SLEEP_TIME between queries.
        - genome_assembly: either "GRCh37.p13" or "GRCh38.p7", it will be used
          to generate position tags to identify each variant and match them
          to ClinVar Variation Reports in case the rs ID is not enough.
        - **cache_kwargs will be passed to the Cache constructor if the cache
          option is not already a Cache instance.

        See the docstring of Pipeline.run for some usage examples.
        """
        if isinstance(cache, Cache):
            cache = cache
        else:
            cache = create_cache(cache, **cache_kwargs)

        self.annotation_kwargs = {
            'cache': cache,
            'use_cache': use_cache,
            'use_web': use_web,
            'proxies': proxies,
            'sleep_time': sleep_time,
        }
        self.clinvar_vcf_path = clinvar_vcf_path
        self.genome_assembly = genome_assembly

        if proxies is None:
            raise NoProxiesException(
                "It's not advisable to run the complete pipeline "
                "without proxies, specially if you're going to "
                "annotate a lot of variants, because OMIM can get "
                "your IP banned. Try installing Tor locally and pass "
                "something this: "
                "proxies={'http': 'socks5://localhost:9050'}. If you "
                "still want to run without proxies, set proxies "
                "explicitely as an empty dict (proxies={}).")
Ejemplo n.º 8
0
def test_only_printable():
    mock_cache = create_cache('mock_cache')
    assert mock_cache._only_printable('foo – bar') == 'foo  bar'
    assert mock_cache._only_printable('foobar') == 'foobar'