Ejemplo n.º 1
0
def visualize_secondary_structure(sse, first_id):

    dssp_to_abc = {
        "I": "c",
        "S": "c",
        "H": "a",
        "E": "b",
        "G": "c",
        "B": "b",
        "T": "c",
        "C": "c"
    }

    for element in range(0, len(sse)):
        sse[element] = dssp_to_abc[sse[element]]

    def _add_sec_str(annotation, first, last, str_type):
        if str_type == "a":
            str_type = "helix"
        elif str_type == "b":
            str_type = "sheet"
        else:
            # coil
            return
        feature = seq.Feature("SecStr", [seq.Location(first, last)],
                              {"sec_str_type": str_type})
        annotation.add_feature(feature)

    # Find the intervals for each secondary structure element
    # and add to annotation
    annotation = seq.Annotation()
    curr_sse = None
    curr_start = None
    for i in range(len(sse)):
        if curr_start is None:
            curr_start = i
            curr_sse = sse[i]
        else:
            if sse[i] != sse[i - 1]:
                _add_sec_str(annotation, curr_start + first_id,
                             i - 1 + first_id, curr_sse)
                curr_start = i
                curr_sse = sse[i]
    # Add last secondary structure element to annotation
    _add_sec_str(annotation, curr_start + first_id, i - 1 + first_id, curr_sse)

    fig = plt.figure(figsize=(8.0, 3.0))
    ax = fig.add_subplot(111)
    graphics.plot_feature_map(
        ax,
        annotation,
        symbols_per_line=150,
        loc_range=(first_id, first_id + len(sse)),
        show_numbers=True,
        show_line_position=True,
        feature_plotters=[HelixPlotter(), SheetPlotter()])
    fig.tight_layout()
    plt.show()
Ejemplo n.º 2
0
def make_feature_maps(gene):

    try:
        find_id = entrez.fetch(gene,
                               gettempdir(),
                               suffix="gb",
                               db_name="nuccore",
                               ret_type="gb")
        read_file = gb.GenBankFile.read(find_id)
        file_annotation = gb.get_annotation(read_file)
    except:
        flash('The entered gene could not found. Please try again.', 'error')
        return None

    key_list = []

    for feature in file_annotation:
        keys = feature.key
        key_list.append(keys)
        if feature.key == "source":
            # loc_range has exclusive stop
            loc = list(feature.locs)[0]
            loc_range = (loc.first, loc.last + 1)
            Unique_key = np.unique(key_list)

    pwd = os.getcwd()

    Unique_key = np.unique(key_list)
    for j in range(len(Unique_key)):
        i = Unique_key[j]

        fig, ax = plt.subplots(figsize=(8.0, 2.0))
        graphics.plot_feature_map(ax,
                                  seq.Annotation([
                                      feature for feature in file_annotation
                                      if feature.key == i
                                  ]),
                                  multi_line=False,
                                  loc_range=loc_range,
                                  show_line_position=True)

        plt.title('This plot is for {} features'.format(i))
        plt.savefig(pwd + '/app/static/images/{}.png'.format(i), dpi=300)
        session['valid_gene'] = True

    return None
Ejemplo n.º 3
0
def visualize_secondary_structure(sse, first_id, linesize=200):
    length = sse.shape[0]

    def _add_sec_str(annotation, first, last, str_type):
        if str_type == "a":
            str_type = "helix"
        elif str_type == "b":
            str_type = "sheet"
        else:
            # coil
            return
        feature = seq.Feature("SecStr", [seq.Location(first, last)],
                              {"sec_str_type": str_type})
        annotation.add_feature(feature)

    # Find the intervals for each secondary ssqa element
    # and add to annotation
    annotation = seq.Annotation()
    curr_sse = None
    curr_start = None
    for i in range(len(sse)):
        if curr_start is None:
            curr_start = i
            curr_sse = sse[i]
        else:
            if sse[i] != sse[i - 1]:
                _add_sec_str(annotation, curr_start + first_id,
                             i - 1 + first_id, curr_sse)
                curr_start = i
                curr_sse = sse[i]
    # Add last secondary ssqa element to annotation
    _add_sec_str(annotation, curr_start + first_id, i - 1 + first_id, curr_sse)

    fig = plt.figure(figsize=(8.0, 3.0))
    ax = fig.add_subplot(111)
    graphics.plot_feature_map(
        ax,
        annotation,
        symbols_per_line=linesize,
        loc_range=(1, length + 1),
        show_numbers=True,
        show_line_position=True,
        feature_plotters=[HelixPlotter(), SheetPlotter()])
    fig.tight_layout()
Ejemplo n.º 4
0
annotation = gb.get_annotation(gb_file, include_only=["gene"])
# Find the minimum and maximum locations of lac genes
min_loc = seq_length
max_loc = 1
for feature in annotation:
    for loc in feature.locs:
        # Ignore if feature is only a pseudo-gene (e.g. gene fragment)
        # and check if feature is lacA gene (begin of lac operon)
        if "gene" in feature.qual \
            and  "pseudo" not in feature.qual \
            and feature.qual["gene"] == "lacA":
            if min_loc > loc.first:
                min_loc = loc.first
            if max_loc < loc.last:
                max_loc = loc.last
# Extend the location range by 1000 (arbitrary) in each dirction
min_loc -= 10000
max_loc += 10000
# Visualize the region as feature map

fig = plt.figure(figsize=(8.0, 8.0))
ax = fig.add_subplot(111)
graphics.plot_feature_map(ax,
                          annotation,
                          loc_range=(min_loc, max_loc),
                          symbols_per_line=2000,
                          show_numbers=True,
                          show_line_position=True)
fig.tight_layout()

plt.show()
Ejemplo n.º 5
0
########################################################################
# Similarily to :class:`Alignment` objects, we can visualize an
# Annotation in a *feature map*.
# In order to avoid overlaping features, we draw only the *CDS* feature.

# Get the range of the entire annotation via the *source* feature
for feature in annotation:
    if feature.key == "source":
        # loc_range has exclusive stop
        loc = list(feature.locs)[0]
        loc_range = (loc.first, loc.last + 1)
fig, ax = plt.subplots(figsize=(8.0, 1.0))
graphics.plot_feature_map(
    ax,
    seq.Annotation([feature for feature in annotation
                    if feature.key == "CDS"]),
    multi_line=False,
    loc_range=loc_range,
    show_line_position=True)
fig.tight_layout()

########################################################################
# :class:`Annotation` objects can be indexed with slices, that represent
# the start and the stop base/residue of the annotation from which the
# subannotation is created.
# All features, that are not in this range, are not included in the
# subannotation.
# In order to demonstrate this indexing method, we create a
# subannotation that includes only features in range of the gene itself
# (without the regulatory stuff).
Ejemplo n.º 6
0
#     feature_plotters=[ssp.HelixPlotter(), ssp.SheetPlotter()]
# )

#%% Used if 3D structure is not available and secondary structure inferred from model

exported_ss_path = os.path.join(dataRootDir, ssFolderDir1,
                                'SrpR_structure_model',
                                'SrpR_Jpred_sec_struct.csv')

annotation = ssp.ss_csv_to_annotation(csv_path=exported_ss_path)

graphics.plot_feature_map(
    ax[4, 1],
    annotation,
    multi_line=False,
    show_numbers=False,
    show_line_position=False,
    # 'loc_range' takes exclusive stop -> length+1 is required
    # loc_range=(1,194), # BM010
    loc_range=(1, 214),  # BM011
    feature_plotters=[ssp.HelixPlotter(),
                      ssp.SheetPlotter()])

# ax[4,1].set_xlim(start_aa,end_aa)

# Make room for plot at the bottom
plt.gcf().subplots_adjust(hspace=0.15, wspace=0.1)

# Save Figure
outputDir = os.path.join(dataRootDir, ssFolderDir1, figName)
fig.savefig(outputDir)
Ejemplo n.º 7
0
                color=biotite.colors["orange"],
                linewidth=0))


# Test our drawing functions with example annotation
annotation = seq.Annotation([
    seq.Feature("SecStr", [seq.Location(10, 40)], {"sec_str_type": "helix"}),
    seq.Feature("SecStr", [seq.Location(60, 90)], {"sec_str_type": "sheet"}),
])

fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
    ax,
    annotation,
    multi_line=False,
    loc_range=(1, 100),
    # Register our drawing functions
    feature_plotters=[HelixPlotter(), SheetPlotter()])
fig.tight_layout()

########################################################################
# Now let us do some serious application.
# We want to visualize the secondary structure of one monomer of the
# homodimeric transketolase (PDB: 1QGD).
# The simplest way to do that, is to fetch the corresponding GenBank
# file, extract an `Annotation` object from the file and draw the
# annotation.

# Fetch GenBank files of the TK's first chain and extract annotatation
file_name = entrez.fetch("1QGD_A", biotite.temp_dir(), "gb", "protein", "gb")
Ejemplo n.º 8
0
                dx,
                dy,
                self._tail_width * bbox.height,
                self._head_width * bbox.height,
                # Create head with 90 degrees tip
                # -> head width/length ratio = 1/2
                head_ratio=0.5,
                draw_head=draw_head,
                color=biotite.colors["orange"],
                linewidth=0))


#%%

fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
    ax,
    annotation,
    multi_line=False,
    loc_range=(1, 194),
    # Register our drawing functions
    feature_plotters=[HelixPlotter(), SheetPlotter()])

# graphics.plot_feature_map(
#     ax[4,1], annotation, multi_line=False,
#     show_numbers=False, show_line_position=False,
#     # 'loc_range' takes exclusive stop -> length+1 is required
#     loc_range=(1,194),
#     feature_plotters=[HelixPlotter(), SheetPlotter()]
# )
Ejemplo n.º 9
0
import biotite.sequence.graphics as graphics

strand = Location.Strand.FORWARD
prom = Feature("regulatory", [Location(10, 50, strand)], {
    "regulatory_class": "promoter",
    "note": "T7"
})
rbs1 = Feature("regulatory", [Location(60, 75, strand)], {
    "regulatory_class": "ribosome_binding_site",
    "note": "RBS1"
})
gene1 = Feature("gene", [Location(81, 380, strand)], {"gene": "gene1"})
rbs2 = Feature("regulatory", [Location(400, 415, strand)], {
    "regulatory_class": "ribosome_binding_site",
    "note": "RBS2"
})
gene2 = Feature("gene", [Location(421, 1020, strand)], {"gene": "gene2"})
term = Feature("regulatory", [Location(1050, 1080, strand)],
               {"regulatory_class": "terminator"})
annotation = Annotation([prom, rbs1, gene1, rbs2, gene2, term])

fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
    ax,
    annotation,
    multi_line=False,
    loc_range=(1, 1101),
)
fig.tight_layout()
plt.show()