Python get_sequence 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: biotite.structure.io.pdbx

메소드/함수: get_sequence

hotexamples.com에서의 예제들: 3

Python get_sequence - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 biotite.structure.io.pdbx.get_sequence에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_pdbx.py 프로젝트: Discngine/biotite

def test_get_sequence():
    file = pdbx.PDBxFile.read(join(data_dir("structure"), "5ugo.cif"))
    sequences = pdbx.get_sequence(file)
    file = pdbx.PDBxFile.read(join(data_dir("structure"), "4gxy.cif"))
    sequences += pdbx.get_sequence(file)
    assert str(sequences[0]) == "CCGACGGCGCATCAGC"
    assert type(sequences[0]) is seq.NucleotideSequence
    assert str(sequences[1]) == "GCTGATGCGCC"
    assert type(sequences[1]) is seq.NucleotideSequence
    assert str(sequences[2]) == "GTCGG"
    assert type(sequences[2]) is seq.NucleotideSequence
    assert (str(sequences[3]) == "MSKRKAPQETLNGGITDMLTELANFEKNVSQAIHKYN"
            "AYRKAASVIAKYPHKIKSGAEAKKLPGVGTKIAEKIDEFLATGKLRKLEKIRQD"
            "DTSSSINFLTRVSGIGPSAARKFVDEGIKTLEDLRKNEDKLNHHQRIGLKYFGD"
            "FEKRIPREEMLQMQDIVLNEVKKVDSEYIATVCGSFRRGAESSGDMDVLLTHPS"
            "FTSESTKQPKLLHQVVEQLQKVHFITDTLSKGETKFMGVCQLPSKNDEKEYPHR"
            "RIDIRLIPKDQYYCGVLYFTGSDIFNKNMRAHALEKGFTINEYTIRPLGVTGVA"
            "GEPLPVDSEKDIFDYIQWKYREPKDRSE")
    assert type(sequences[3]) is seq.ProteinSequence
    assert (str(sequences[4]) == "GGCGGCAGGTGCTCCCGACCCTGCGGTCGGGAGTTAA"
            "AAGGGAAGCCGGTGCAAGTCCGGCACGGTCCCGCCACTGTGACGGGGAGTCGCC"
            "CCTCGGGATGTGCCACTGGCCCGAAGGCCGGGAAGGCGGAGGGGCGGCGAGGAT"
            "CCGGAGTCAGGAAACCTGCCTGCCGTC")
    assert type(sequences[4]) is seq.NucleotideSequence

예제 #2

파일 보기

def test_search_sequence():
    IDENTIY_CUTOFF = 0.9
    pdbx_file = pdbx.PDBxFile.read(join(data_dir("structure"), "1l2y.cif"))
    ref_sequence = pdbx.get_sequence(pdbx_file)[0]
    query = rcsb.SequenceQuery(ref_sequence,
                               "protein",
                               min_identity=IDENTIY_CUTOFF)
    test_ids = rcsb.search(query)

    for id in test_ids:
        fasta_file = fasta.FastaFile.read(rcsb.fetch(id, "fasta"))
        test_sequence = fasta.get_sequence(fasta_file)
        matrix = align.SubstitutionMatrix.std_protein_matrix()
        alignment = align.align_optimal(ref_sequence,
                                        test_sequence,
                                        matrix,
                                        terminal_penalty=False)[0]
        identity = align.get_sequence_identity(alignment, mode="shortest")
        assert identity >= IDENTIY_CUTOFF

예제 #3

파일 보기

파일: residue_coevolution.py 프로젝트: thomasnevolianis/biotite

import matplotlib.colors as colors
import biotite
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.application.blast as blast
import biotite.application.clustalo as clustalo
import biotite.database.rcsb as rcsb
import biotite.database.entrez as entrez

# Get structure and sequence
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif"))
sequence = pdbx.get_sequence(pdbx_file)[0]
# 'use_author_fields' is set to false,
# to ensure that values in the 'res_id' annotation point to the sequence
structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
structure = structure[struc.filter_amino_acids(structure)]

# Identity threshold for a sequence to be counted as homologous sequence
IDENTITY_THESHOLD = 0.4
# Find homologous proteins in SwissProt via BLAST
app = blast.BlastWebApp("blastp", sequence, database="swissprot")
app.start()
app.join()
alignments = app.get_alignments()
hit_seqs = [sequence]
hit_ids = ["Query"]
hit_starts = [1]