def test_genbank_utility_gp():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenPept file. 
    """
    gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp"))
    #[print(e) for e in gp_file._field_pos]
    assert gb.get_locus(gp_file) \
        == ("AAC37312", 147, "", False, "MAM", "27-APR-1993")
    assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]."
    assert gb.get_version(gp_file) == "AAC37312.1"
    assert gb.get_gi(gp_file) == 163334
    annotation = gb.get_annotation(gp_file)
    feature = seq.Feature(
        "Site",
        [seq.Location(start, stop) for start, stop in zip(
            [52,55,62,76,78,81,117,120,125],
            [53,55,62,76,78,81,117,120,126]
        )],
        {"note": "lysozyme catalytic cleft [active]", "site_type": "active"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gp_file, format="gp")) == 147
def test_genbank_utility_gb():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenBank file. 
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb"))
    assert gb.get_locus(gb_file) \
        == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017")
    assert gb.get_definition(gb_file) \
        == ("Escherichia coli BL21(DE3), complete genome.")
    assert gb.get_version(gb_file) == "CP001509.3"
    assert gb.get_gi(gb_file) == 296142109
    assert gb.get_db_link(gb_file) \
        == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"}
    annotation = gb.get_annotation(gb_file, include_only=["CDS"])
    feature = seq.Feature(
        "CDS",
        [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)],
        {"gene": "yaaA", "transl_table": "11"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
Exemple #3
0
    "R": -4.5
}

# Look for the Swiss-Prot entry contaning the human HCN1 channel
query =   entrez.SimpleQuery("HCN1", "Gene Name") \
        & entrez.SimpleQuery("h**o sapiens", "Organism") \
        & entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
uids = entrez.search(query, db_name="protein")
file_name = entrez.fetch(uids[0],
                         biotite.temp_dir(),
                         "gp",
                         db_name="protein",
                         ret_type="gp")

gp_file = gb.GenBankFile.read(file_name)
hcn1 = seq.ProteinSequence(gb.get_sequence(gp_file, format="gp"))
print(hcn1)

########################################################################
# The positional hydropathy is calculated and smoothened using
# a moving average for clearer visualization.

hydropathies = np.array([hydropathy_dict[symbol] for symbol in hcn1])


def moving_average(data_set, window_size):
    weights = np.full(window_size, 1 / window_size)
    return np.convolve(data_set, weights, mode='valid')


# Apply moving average over 15 amino acids for clearer visualization
Exemple #4
0
import biotite.structure.graphics as strucgraphics
import biotite.application.viennarna as viennarna

# Download Escherichia coli BL21 and Salmonella enterica genome
gb_file = gb.MultiFile.read(
    entrez.fetch_single_file(["CP001509", "CP019649"], None, "nuccore", "gb"))
ec_file, se_file = tuple(gb_file)

annot_seq = gb.get_annotated_sequence(ec_file, include_only=["ncRNA"])
# Find M1 gene in E. coli genome via its annotation
for feature in annot_seq.annotation:
    if "product" in feature.qual and "RNase P" in feature.qual["product"]:
        m1_sequence = annot_seq[feature]

# Get S. enterica genome sequence
se_genome = gb.get_sequence(se_file)

# We want to search in the genome sequence and its reverse complement
genomic_seqs = [se_genome, se_genome.reverse().complement()]

########################################################################
# In an initial fast matching step, we look for matching *k-mers*
# between *M1* and the *S. enterica* genome.
# A matching *k-mer* is a length *k* subsequence, that appears in both
# sequences.
# In *Biotite* this task is carried out by a :class:`KmerTable`.
#
# Later we will perform costly gapped alignments at the match positions.
# To reduce the number of triggered gapped alignments at unspecific
# matches, we add some additional filters:
# One extra condition is that two non-overlapping matches must appear