Beispiel #1
0
    def __init__(self,
                 seq_repo_path=None,
                 regions_preload=None,
                 preload_pos_margin=500,
                 assembly_name=None):
        '''
        :param seq_repo_path: Path to local seqrepo directory. If None, read HGVS_SEQREPO_DIR environment variable
        :param regions_preload: Iterable[ChrInterval], optionally preload these genomic regions
        :param preload_pos_margin: adding margin at the end of a preloaded genome
          in order to have data to verify structural variants across the end of a gene
        '''

        if not seq_repo_path:
            seq_repo_path = os.environ.get("HGVS_SEQREPO_DIR")

        if seq_repo_path:
            seq_repo = SeqRepo(seq_repo_path)
            self.seq_repo_fetcher = seq_repo.fetch
        else:
            logging.warn("Using remote sequence provider.")
            self.seq_repo_fetcher = seqfetcher.fetch_seq

        self.assembly_name = assembly_name
        if not self.assembly_name:
            self.assembly_name = self.DEFAULT_ASSY_NAME
        self.assy_map = assemblies.make_name_ac_map(self.assembly_name)

        self.preloaded_regions = {}
        if regions_preload:
            self.preloaded_regions = build_interval_trees_by_chr(
                regions_preload,
                lambda c, s, e: self._fetch_seq(c, s, e + preload_pos_margin))
Beispiel #2
0
    def __init__(self):
        # If HGVS_SEQREPO_DIR is defined, we use seqrepo for *all* sequences.
        # If HGVS_SEQREPO_URL is defined, use instance of seqrepo-rest-service for *all* sequences.
        #   (see https://github.com/biocommons/seqrepo-rest-service for more info)
        # Otherwise, we fall back to remote sequence fetching
        seqrepo_dir = os.environ.get("HGVS_SEQREPO_DIR")
        seqrepo_url = os.environ.get("HGVS_SEQREPO_URL")
        if seqrepo_dir:
            from biocommons.seqrepo import SeqRepo
            self.sr = SeqRepo(seqrepo_dir)

            def _fetch_seq_seqrepo(ac, start_i=None, end_i=None):
                return self.sr.fetch(ac, start_i, end_i)

            self.fetcher = _fetch_seq_seqrepo
            self.source = "SeqRepo ({})".format(seqrepo_dir)
        elif seqrepo_url:
            from biocommons.seqrepo.dataproxy import SeqRepoRESTDataProxy
            self.sr = SeqRepoRESTDataProxy(seqrepo_url)
            self.fetcher = lambda ac, start_i=None, end_i=None: self.sr.get_sequence(
                ac, start_i, end_i)
            self.source = f"SeqRepo REST ({seqrepo_url})"
        else:
            self.sr = None
            self.fetcher = bioutils.seqfetcher.fetch_seq
            self.source = "bioutils.seqfetcher (network fetching)"
        _logger.info("Fetching sequences with " + self.source)
def test_refseq_translation(tmpdir_factory):
    dir = str(tmpdir_factory.mktemp('seqrepo'))

    seqrepo = SeqRepo(dir, writeable=True)
    seqrepo.store("NCBISEQUENCE", [{"namespace": "NCBI", "alias": "ncbiac"}])
    seqrepo.commit()
    del seqrepo

    seqrepo = SeqRepo(dir, writeable=False, translate_ncbi_namespace=False)
    aliases = list(seqrepo.aliases.find_aliases(alias="ncbiac"))
    assert len(aliases) == 1
    assert aliases[0]["namespace"] == "NCBI"

    seqrepo = SeqRepo(dir, writeable=False, translate_ncbi_namespace=True)
    aliases = list(seqrepo.aliases.find_aliases(alias="ncbiac"))
    assert len(aliases) == 1
    assert aliases[0]["namespace"] == "RefSeq"
def test_seqrepo_dir_not_exist(tmpdir_factory):
    """Ensure that exception is raised for non-existent seqrepo directory"""
    dir = str(tmpdir_factory.mktemp('seqrepo')) + "-IDONTEXIST"
    with pytest.raises(OSError) as ex:

        SeqRepo(dir, writeable=False)

    assert "Unable to open SeqRepo directory" in str(ex.value)
Beispiel #5
0
    def get_seqrepo(self, seqrepo_dir) -> SeqRepo:
        """Return SeqRepo instance if seqrepo_dir exists.

        :param Path seqrepo_dir: Path to seqrepo directory
        :return: SeqRepo instance
        """
        if not Path(seqrepo_dir).exists():
            raise NotADirectoryError(f"Could not find {seqrepo_dir}")
        return SeqRepo(seqrepo_dir)
Beispiel #6
0
    def __init__(self):
        # If HGVS_SEQREPO_DIR is defined, we use seqrepo for *all* sequences
        # Otherwise, we fall back to remote sequence fetching
        seqrepo_dir = os.environ.get("HGVS_SEQREPO_DIR")
        if seqrepo_dir:
            from biocommons.seqrepo import SeqRepo
            self.sr = SeqRepo(seqrepo_dir)

            def _fetch_seq_seqrepo(ac, start_i=None, end_i=None):
                return self.sr.fetch(ac, start_i, end_i)

            self.fetcher = _fetch_seq_seqrepo
            self.source = "SeqRepo ({})".format(seqrepo_dir)
        else:
            self.sr = None
            self.fetcher = bioutils.seqfetcher.fetch_seq
            self.source = "bioutils.seqfetcher (network fetching)"
        _logger.info("Fetching sequences with " + self.source)
def make_seqrepo(writeable):    
    sr = SeqRepo("/tmp/sr", writeable=True)
    sr.store("SMELLASSWEET", [{"namespace": "en", "alias": "rose"}, {"namespace": "fr", "alias": "rose"}])

    if writeable is False:
        del sr
        sr = SeqRepo("/tmp/sr", writeable=writeable)

    print("pid {pid} created {sr}".format(pid=os.getpid(), sr=sr))
    return sr
Beispiel #8
0
    def __init__(self):
        # If HGVS_SEQREPO_DIR is defined, we use seqrepo for *all* sequences
        # Otherwise, we fall back to remote sequence fetching
        seqrepo_dir = os.environ.get("HGVS_SEQREPO_DIR")
        if seqrepo_dir:
            from biocommons.seqrepo import SeqRepo
            sr = SeqRepo(seqrepo_dir)

            def _fetch_seq_seqrepo(ac, start_i=None, end_i=None):
                return sr.fetch(ac, start_i, end_i)

            self.fetcher = _fetch_seq_seqrepo
            logger.info(
                "Using SeqRepo({}) sequence fetching".format(seqrepo_dir))
        else:
            self.fetcher = bioutils.seqfetcher.fetch_seq
            logger.info("Using remote sequence fetching")
Beispiel #9
0
def test_refseq_translation(tmpdir_factory):
    dir = str(tmpdir_factory.mktemp('seqrepo'))

    seqrepo = SeqRepo(dir, writeable=True)
    seqrepo.store("NCBISEQUENCE", [{"namespace": "NCBI", "alias": "ncbiac"}])
    seqrepo.commit()
    del seqrepo

    seqrepo = SeqRepo(dir, writeable=False, translate_ncbi_namespace=False)
    aliases = list(seqrepo.aliases.find_aliases(alias="ncbiac"))
    assert len(aliases) == 1
    assert aliases[0]["namespace"] == "NCBI"

    seqrepo = SeqRepo(dir, writeable=False, translate_ncbi_namespace=True)
    aliases = list(seqrepo.aliases.find_aliases(alias="ncbiac"))
    assert len(aliases) == 1
    assert aliases[0]["namespace"] == "RefSeq"
Beispiel #10
0
class SeqFetcher(object):
    """This class is intended primarily as a mixin for HGVS data providers
    that doen't otherwise have access to sequence data.  It uses the
    fetch_seq() function in this module to fetch sequences from
    several sources; see that function for details.

    >> sf = SeqFetcher()

    >> sf.fetch_seq('NP_056374.2',0,10)
    'MESRETLSSS'

    """
    def __init__(self):
        # If HGVS_SEQREPO_DIR is defined, we use seqrepo for *all* sequences
        # Otherwise, we fall back to remote sequence fetching
        seqrepo_dir = os.environ.get("HGVS_SEQREPO_DIR")
        if seqrepo_dir:
            from biocommons.seqrepo import SeqRepo
            self.sr = SeqRepo(seqrepo_dir)

            def _fetch_seq_seqrepo(ac, start_i=None, end_i=None):
                return self.sr.fetch(ac, start_i, end_i)

            self.fetcher = _fetch_seq_seqrepo
            self.source = "SeqRepo ({})".format(seqrepo_dir)
        else:
            self.sr = None
            self.fetcher = bioutils.seqfetcher.fetch_seq
            self.source = "bioutils.seqfetcher (network fetching)"
        _logger.info("Fetching sequences with " + self.source)

    def fetch_seq(self, ac, start_i=None, end_i=None):
        try:
            return self.fetcher(ac, start_i, end_i)
        except Exception as ex:
            raise HGVSDataNotAvailableError(
                "Failed to fetch {ac} from {self.source} ({ex})".format(
                    ac=ac, ex=ex, self=self))
def seqrepo_keepcase(tmpdir_factory):
    dir = str(tmpdir_factory.mktemp('seqrepo'))
    return SeqRepo(dir, upcase=False, writeable=True)
def seqrepo_ro(tmpdir_factory):
    dir = str(tmpdir_factory.mktemp('seqrepo'))
    sr = SeqRepo(dir, writeable=True)
    del sr    # close it
    return SeqRepo(dir)
Beispiel #13
0
def _get_seqrepo(cf):
    sr_dir = cf.get("sequences", "seqrepo")
    sr = SeqRepo(root_dir=sr_dir, translate_ncbi_namespace=True)
    logger.info("Opened {sr}".format(sr=sr))
    return sr
Beispiel #14
0
    def create_text(self, defn):
        vo = models.Text(definition=defn)
        vo._id = ga4gh_identify(vo)
        return vo


if __name__ == "__main__":
    import os
    from biocommons.seqrepo import SeqRepo
    from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy, SeqRepoDataProxy

    if "SEQREPO_DIR" in os.environ:
        seqrepo_dir = os.environ.get("SEQREPO_DIR",
                                     "/usr/local/share/seqrepo/latest")
        data_proxy = SeqRepoDataProxy(SeqRepo(root_dir=seqrepo_dir))
    else:
        seqrepo_url = os.environ.get(
            "GA4GH_VRS_DATAPROXY_URI",
            "https://services.genomicmedlab.org/seqrepo")
        data_proxy = SeqRepoRESTDataProxy(base_url=seqrepo_url)
    object_store = {}

    av = AnyVar(data_proxy=data_proxy, object_store=object_store)

    v = av.translate_allele("NM_000551.3:c.1A>T", fmt="hgvs")
    vid = av.put_object(v)

    v2 = av.get_object(vid, deref=True)
    assert v == v2  # roundtrip test
Beispiel #15
0
def search():    
    from biocommons.seqrepo import SeqRepo
    sr = SeqRepo("/usr/local/share/seqrepo/latest")
    #return [sr[key] for key in sorted(sr.keys())]
    return sr["NC_000001.11"][780000:780020
Beispiel #16
0
def _get_seqrepo(cf):
    sr_dir = cf.get("sequences", "seqrepo")
    sr = SeqRepo(root_dir=sr_dir)
    logger.info("Opened sequence directory " + sr_dir)
    return sr
def seqrepo(tmpdir_factory):
    dir = str(tmpdir_factory.mktemp('seqrepo'))
    return SeqRepo(dir, writeable=True)
Beispiel #18
0
def _get_seqrepo(cf):
    sr_dir = cf.get("sequences", "seqrepo")
    sr = SeqRepo(root_dir=sr_dir)
    logger.info("Opened {sr}".format(sr=sr))
    return sr