Python make_snv_key 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: genomic_mappings

메소드/함수: make_snv_key

hotexamples.com에서의 예제들: 4

Python make_snv_key - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 genomic_mappings.make_snv_key에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_make_snv_key():
    test_data = (
        # chrom, pos, ref, alt
        (('1', '211', 'A', 'C'), ('1', '211', 'A', 'c')),
        (('X', '2012', 'T', 'G'), ('X', '2012', 't', 'g')),
    )
    for attributes, equivalent_attributes in test_data:
        result_1 = genomic_mappings.make_snv_key(*attributes)
        result_2 = genomic_mappings.make_snv_key(*equivalent_attributes)
        assert result_1 == result_2

예제 #2

파일 보기

파일: test_import_results.py 프로젝트: KaiLiCn/ActiveDriverDB

def test_mappings():
    """This is a simple inclusion test for genome -> proteme mutation mappings.

    Knowing the data, we demand the items from the right side (of test data)
    to be in the results of queries specified on the left side.
    """

    test_data = (
        # (chrom, dna_pos, dna_ref, dna_alt), (name, pos, ref, alt)
        (('17', '7572934', 'G', 'A'), ('TP53', 353, 'S', 'L')),
        (('17', '19282215', 't', 'a'), ('MAPK7', 1, 'M', 'K')),
        (('21', '40547520', 'g', 'a'), ('PSMG1', 283, 'T', 'I')),
        (('9', '125616157', 't', 'a'), ('RC3H2', 1064, 'Y', 'F')),
        (('11', '120198175', 'g', 'a'), ('TMEM136', 31, 'V', 'M')),
        (('10', '81838457', 't', 'a'), ('TMEM254', 1, 'M', 'K')),
        (('13', '111267940', 't', 'a'), ('CARKD', 1, 'M', 'K')),
        (('6', '30539266', 't', 'a'), ('ABCF1', 1, 'M', 'K')),
        (('6', '36765430', 'g', 'a'), ('CPNE5', 140, 'L', 'F')),
        (('12', '123464753', 't', 'a'), ('ARL6IP4', 1, 'M', 'K')),
    )

    for genomic_data, protein_data in test_data:

        snv = make_snv_key(*genomic_data)

        items = [decode_csv(item) for item in bdb[snv]]

        for item in items:
            retrieved_data = (Protein.query.get(item['protein_id']).gene.name,
                              item['pos'], item['ref'], item['alt'])
            if retrieved_data == protein_data:
                break
        else:
            raise Exception(retrieved_data, protein_data)

예제 #3

파일 보기

파일: test_protein_mappings.py 프로젝트: krassowski/ActiveDriverDB

    def test_genome_proteome_mappings(self):

        mappings_filename, gene, proteins = create_test_data()

        broken_sequences = import_genome_proteome_mappings(
            proteins, path.dirname(mappings_filename),
            path.basename(mappings_filename))

        # in some cases it is needed to reload bdb after import
        bdb.reload()

        assert not bdb[make_snv_key('1', 19282216, 'G', 'A')]
        assert bdb[make_snv_key('17', 19282216, 'G', 'A')]

        assert set(broken_sequences.keys()) == {'NM_002749'}
        assert [('NM_002749', 'L', 'A', '5', 'Q')
                ] in list(broken_sequences.values())

예제 #4

파일 보기

파일: mappings.py 프로젝트: KaiLiCn/ActiveDriverDB

def import_genome_proteome_mappings(
        proteins,
        mappings_dir='data/200616/all_variants/playground',
        mappings_file_pattern='annot_*.txt.gz',
        bdb_dir=''):
    print('Importing mappings:')

    chromosomes = get_human_chromosomes()
    broken_seq = defaultdict(list)

    bdb.reset()
    bdb.close()

    path = current_app.config['BDB_DNA_TO_PROTEIN_PATH']
    if bdb_dir:
        path = bdb_dir + '/' + basename(path)

    bdb.open(path, cache_size=20480 * 8 * 8 * 8 * 8)

    for line in read_from_gz_files(mappings_dir, mappings_file_pattern):
        try:
            chrom, pos, ref, alt, prot = line.rstrip().split('\t')
        except ValueError as e:
            print(e, line)
            continue

        assert chrom.startswith('chr')
        chrom = chrom[3:]

        assert chrom in chromosomes
        ref = ref.rstrip()

        # new Coding Sequence Variants to be added to those already
        # mapped from given `snv` (Single Nucleotide Variation)

        for dest in filter(bool, prot.split(',')):
            try:
                name, refseq, exon, cdna_mut, prot_mut = dest.split(':')
            except ValueError as e:
                print(e, line)
                continue
            assert refseq.startswith('NM_')
            # refseq = int(refseq[3:])
            # name and refseq are redundant with respect one to another

            assert exon.startswith('exon')
            exon = exon[4:]

            assert cdna_mut.startswith('c')
            try:
                cdna_ref, cdna_pos, cdna_alt = decode_mutation(cdna_mut)
            except ValueError as e:
                print(e, line)
                continue

            try:
                strand = determine_strand(ref, cdna_ref, alt, cdna_alt)
            except DataInconsistencyError as e:
                print(e, line)
                continue

            assert prot_mut.startswith('p')
            # we can check here if a given reference nuc is consistent
            # with the reference amino acid. For example cytosine in
            # reference implies that there should't be a methionine,
            # glutamic acid, lysine nor arginine. The same applies to
            # alternative nuc/aa and their combinations (having
            # references (nuc, aa): (G, K) and alt nuc C defines that
            # the alt aa has to be Asparagine (N) - no other is valid).
            # Note: it could be used to compress the data in memory too
            aa_ref, aa_pos, aa_alt = decode_mutation(prot_mut)

            try:
                # try to get it from cache (`proteins` dictionary)
                protein = proteins[refseq]
            except KeyError:
                continue

            assert aa_pos == (int(cdna_pos) - 1) // 3 + 1

            broken_sequence_tuple = is_sequence_broken(protein, aa_pos, aa_ref,
                                                       aa_alt)

            if broken_sequence_tuple:
                broken_seq[refseq].append(broken_sequence_tuple)
                continue

            is_ptm_related = protein.has_sites_in_range(aa_pos - 7, aa_pos + 7)

            snv = make_snv_key(chrom, pos, cdna_ref, cdna_alt)

            # add new item, emulating set update
            item = encode_csv(strand, aa_ref, aa_alt, cdna_pos, exon,
                              protein.id, is_ptm_related)

            bdb.add(snv, item)

    return broken_seq