Python Speciesの例

プログラミング言語: Python

名前空間/パッケージ名: pyensembl.species

クラス/型: Species

hotexamples.comのコード掲載数: 6

Python Species - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpyensembl.species.Speciesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Species(1)

from_dict(1)

from_json(1)

コード例 #1

ファイルを表示

ファイル: species.py プロジェクト: rraadd88/pyensembl

 def register(cls, latin_name, synonyms, reference_assemblies):
     """
     Create a Species object from the given arguments and enter into
     all the dicts used to look the species up by its fields.
     """
     species = Species(latin_name=latin_name,
                       synonyms=synonyms,
                       reference_assemblies=reference_assemblies)
     cls._latin_names_to_species[species.latin_name] = species
     for synonym in synonyms:
         # if synonym in cls._common_names_to_species:
         #     raise ValueError("Can't use synonym '%s' for both %s and %s" % (
         #         synonym,
         #         species,
         #         cls._common_names_to_species[synonym]))
         cls._common_names_to_species[synonym] = species
     for reference_name in reference_assemblies:
         # if reference_name in cls._reference_names_to_species:
         #     raise ValueError("Can't use reference '%s' for both %s and %s" % (
         #         reference_name,
         #         species,
         #         cls._reference_names_to_species[reference_name]))
         cls._reference_names_to_species[reference_name] = species
     return species

コード例 #2

ファイルを表示

def test_species_to_json():
    eq_(human, Species.from_json(human.to_json()))

コード例 #3

ファイルを表示

def test_species_to_dict():
    eq_(human, Species.from_dict(human.to_dict()))

コード例 #4

ファイルを表示

ファイル: species.py プロジェクト: rraadd88/pyensembl

def collect_all_genomes():
    """
    data aware generation of Species object.
    searches in .cache dir and generates a Species object

    Also generates a tsv file with all the genome info.
    Such file can be used to install sets of genomes at once (logic is bit like conda environment profile).
    It would be relatively easy to code export and import for such a file.
    """
    def str2num(s, cat=False, force=True):
        """
        Converts string to integer
        eg. ensembl92 to 92

        :param s: string
        :param cat: Whether to concatenate detected integers. eg. 20,23 to 2023
        :param force: If True, ignores decimal point error. 
        """
        import re
        if '.' in s and not force:
            raise ValueError(
                f"A string can only be converted to integeres, found a '.' in {s}"
            )
        n = re.findall(r'\d+', s)
        if len(n) == 0:
            raise ValueError("No digits found in string {}".format(s))
        elif len(n) == 1:
            return int(n[0])
        else:
            if cat:
                return int(''.join(n))
            else:
                return n

    from glob import glob
    from os.path import dirname, basename, exists
    import numpy as np
    import pandas as pd
    from pyensembl.species import normalize_species_name, Species

    # here's how I get the .cache directory eg. '/home/user/.cache/pyensembl'
    import datacache
    pyensembl_cache_dir = f"{dirname(datacache.get_data_dir())}/pyensembl"  #FIXME if genomes are installed at other places than .cache

    # all the assemblies
    assemblies = [basename(p) for p in glob(f"{pyensembl_cache_dir}/*")]
    # dataframe that contains all the info (and can be exported as a tsv).
    dspecies = pd.DataFrame(
        columns=['latin name', 'release', 'synonymn', 'assembly'])
    # assempy to release min max dict needed as an input to create Species object
    assembly2releasesminmax = {}
    # following loop populates the dataframe
    genomei = 0
    for assembly in assemblies:
        releases = [
            basename(p) for p in glob(f"{pyensembl_cache_dir}/{assembly}/*")
        ]
        for release in releases:
            releasei = str2num(release)  #FIXME is realease is a float
            genome_dir = f"{pyensembl_cache_dir}/{assembly}/{release}"
            genome_files = glob(f"{genome_dir}/*")
            is_genome_installed = True if len(
                genome_files
            ) > 4 else False  #FIXME need more than 4 (.gz) files to be strict
            if is_genome_installed:
                dspecies.loc[genomei, 'assembly'] = assembly
                dspecies.loc[genomei, 'release'] = releasei
                dspecies.loc[genomei, 'synonymn'] = basename(
                    genome_files[0]).split('.')[0]
                dspecies.loc[genomei, 'latin name'] = normalize_species_name(
                    dspecies.loc[genomei, 'synonymn'])
                genomei += 1
    # following loop generates the Species object
    for spc in dspecies['latin name'].unique():
        assembly2releases = {}
        for assembly in dspecies.loc[(dspecies['latin name'] == spc),
                                     'assembly'].unique():
            d = dspecies.loc[((dspecies['latin name'] == spc) &
                              (dspecies['assembly'] == assembly)), :]
            assembly2releases[assembly] = d['release'].min(), d['release'].max(
            )  #FIXME if MAX_ENSEMBL_RELEASE very important and has to be used
        Species.register(latin_name=spc,
                         synonyms=dspecies.loc[(dspecies['latin name'] == spc),
                                               'synonymn'].unique().tolist(),
                         reference_assemblies=assembly2releases)
        Species.dspecies = dspecies
    return Species

コード例 #5

ファイルを表示

ファイル: test_serialization.py プロジェクト: yech1990/pyensembl

def test_species_to_json():
    eq_(human, Species.from_json(human.to_json()))

コード例 #6

ファイルを表示

ファイル: test_serialization.py プロジェクト: yech1990/pyensembl

def test_species_to_dict():
    eq_(human, Species.from_dict(human.to_dict()))