Python IDConverter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: genemunge.convert

메소드/함수: IDConverter

hotexamples.com에서의 예제들: 5

Python IDConverter - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 genemunge.convert.IDConverter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_convert.py 프로젝트: unlearnai/genemunge

def test_converter_convert_list():
    """Try to convert a list of Ensembl IDs to gene symbols."""
    gene_ids = ['ENSG00000000003.14', 'ENSG00000000005.5', 'ENSG00000000419.12']
    gene_symbols = ['TSPAN6', 'TNMD', 'DPM1']

    cleaned_ids = convert.clean_ensembl_ids(gene_ids)
    converter = convert.IDConverter('ensembl_gene_id', 'symbol')
    assert converter.convert_list(cleaned_ids) == gene_symbols

예제 #2

파일 보기

파일: test_convert.py 프로젝트: unlearnai/genemunge

def test_converter_convert():
    """Try to convert an example Ensembl ID to a gene symbol."""
    gene_id = 'ENSG00000000003.14'
    gene_symbol = 'TSPAN6'

    cleaned_id = convert.clean_ensembl_id(gene_id)
    converter = convert.IDConverter('ensembl_gene_id', 'symbol')
    assert converter.convert(cleaned_id) == gene_symbol

예제 #3

파일 보기

파일: test_convert.py 프로젝트: unlearnai/genemunge

def test_converter_all_targets():
    """Try to convert an example Ensembl ID to all allowed identifier types."""
    conversion_targets = convert.IDConverter.potential_ids
    gene_id = 'ENSG00000000003.14'
    cleaned_id = convert.clean_ensembl_id(gene_id)

    for target_id in conversion_targets:
        converter = convert.IDConverter('ensembl_gene_id', target_id)
        converter.convert(cleaned_id)

예제 #4

파일 보기

def make_godict(gofile, force=False):
    """
    Parses the Gene Ontology file and creates a dictionary that is easier
    to work with. Saves the dictionary as a json file.

    Notes:

        uniprot id: column 1
        gene symbol: column 2
        GO Evidence codes: column 5

        Experiment:
            Inferred from Experiment (EXP)
            Inferred from Direct Assay (IDA)
            Inferred from Physical Interaction (IPI)
            Inferred from Mutant Phenotype (IMP)
            Inferred from Genetic Interaction (IGI)
            Inferred from Expression Pattern (IEP)

        Computational:
            Inferred from Sequence or structural Similarity (ISS)
            Inferred from Sequence Orthology (ISO)
            Inferred from Sequence Alignment (ISA)
            Inferred from Sequence Model (ISM)
            Inferred from Genomic Context (IGC)
            Inferred from Biological aspect of Ancestor (IBA)
            Inferred from Biological aspect of Descendant (IBD)
            Inferred from Key Residues (IKR)
            Inferred from Rapid Divergence(IRD)
            Inferred from Reviewed Computational Analysis (RCA)

        Literature:
            Traceable Author Statement (TAS)
            Non-traceable Author Statement (NAS)

        Other:
            Inferred by Curator (IC)
            No biological Data available (ND) evidence code
            Inferred from Electronic Annotation (IEA)

    Args:
        gofile (str): path to the gene ontology file
        force (optional; bool): overwrite the json file if true

    Returns:
        None

    """
    from genemunge import convert
    converter = convert.IDConverter('uniprot_ids', 'ensembl_gene_id')

    # check if the outputfile already exists
    if not force and os.path.exists(OUTPUTFILE):
        return

    # id: {name, namespace, def, parents, children, genes}
    # connections (parent/child): 'is_a' or 'part_of'
    # ignore if 'is_obsolete: true'

    # read in the ontology file
    with open(gofile, "r") as go:
        unparsed = [line.rstrip() for line in go]

    # find the indices marking the beginning of each term
    indices = [i for i, x in enumerate(unparsed) if begins_with_pattern(x, "id:")]

    # group the terms
    grouped = [unparsed[indices[i]: indices[i+1]] for i in range(len(indices)-1)]

    # get rid of obselete terms
    not_obsolete = [g for g in grouped if first_match(g, obsolete) is None]

    # get rid of any term that doesn't have ids
    has_id = [g for g in not_obsolete if first_match(g, go_id) is not None]

    # create the go dictionary
    godict = {}
    for group in has_id:
        parse_group(group, godict)

    # add the children terms
    for term in godict:
        parents = godict[term]['parents']
        for p in parents:
            if term not in godict[p]['children']:
                godict[p]['children'] += [term]

    # add the annotations
    with gzip.open(ANNOTATIONFILE ,'rb') as annotfile:
        for raw_line in annotfile:
            line = raw_line.decode('utf-8')
            if line[0] != '!': # comments
                parsed = line.strip().split('\t')

                database = parsed[0] # currently, this is always UniProtKB
                database_id = parsed[1]
                symbol = parsed[2] # ORF for unnamed
                qualifier = parsed[3]
                go_term = parsed[4]
                database_reference = parsed[5]
                evidence = parsed[6]

                # what to do about colocalizes_with and contributes_to?
                if 'NOT' not in qualifier:
                    try:
                        ensembl = converter.convert(database_id)
                        # add the identifier if it is not NaN
                        if ensembl == ensembl:
                            godict[go_term]['genes'][evidence] += [ensembl]
                    except KeyError:
                        # we have filtered out obsolete go terms
                        # therefore, we have to catch this exception
                        pass

    # write to the file
    with open(OUTPUTFILE, "w") as outfile:
        json.dump(godict, outfile)

예제 #5

파일 보기

파일: test_convert.py 프로젝트: unlearnai/genemunge

def test_converter_construct():
    """Try to construct an IDConverter object."""
    converter = convert.IDConverter('symbol', 'name')