Esempio n. 1
0
def create_feature_annot(loc_range, featuretype, s):
    """ Create a new feature annotation at loc_range with featuretype on strand s. """
    location = SeqFeature.FeatureLocation(
        SeqFeature.ExactPosition(loc_range[0]),
        SeqFeature.ExactPosition(loc_range[1]))
    new_feature = SeqFeature.SeqFeature(location, type=featuretype, strand=s)
    return (new_feature)
Esempio n. 2
0
def modify_genbank(gb_file, fasta_file):
    gb_filename = re.search(r'(.*/users/.*/uploads/.*).(\w*)', gb_file)
    out_file = str(gb_filename.group(1)) + '_modified.' + str(
        gb_filename.group(2))

    genome = SeqIO.read(fasta_file, "fasta").seq
    final_annotations = get_final_annotations(genome)
    final_features = []
    for record in SeqIO.parse(open(gb_file, "r"), "genbank"):
        for feature in record.features:
            if feature.type == "gene" or feature.type == "CDS":
                locus_tag = feature.qualifiers["locus_tag"][0]
                if locus_tag in final_annotations.keys():
                    new_start = final_annotations[locus_tag]["start"]
                    feature.location = SeqFeature.FeatureLocation(
                        SeqFeature.ExactPosition(new_start - 1),
                        SeqFeature.ExactPosition(
                            feature.location.end.position),
                        feature.location.strand)
                    if feature.type == "CDS":
                        feature.qualifiers["product"][0] = final_annotations[
                            locus_tag]["function"]
                        feature.qualifiers["translation"][
                            0] = final_annotations[locus_tag]["translation"]
                else:
                    continue
            final_features.append(feature)  # Append final features
        record.features = final_features
        with open(out_file, "w") as new_gb:
            SeqIO.write(record, new_gb, "genbank")

    return out_file
Esempio n. 3
0
def convert_annotations(block, gb):
    # Add My annotations as features
    for annotation in block["sequence"]["annotations"]:
        gb_annot = SeqFeature.SeqFeature()
        annotation_type = "unknown"

        if "role" in annotation and annotation["role"] != "":
            annotation_type = annotation["role"]

        for key, value in annotation.iteritems():
            if key not in ["start", "end", "notes", "strand", "color", "role"]:
                gb_annot.qualifiers[key] = value
            elif key == "color":
                gb_annot.qualifiers["GC_Color"] = value
            elif key == "notes":
                for notes_key, notes_value in annotation["notes"].iteritems():
                    if notes_key == "genbank":
                        for gb_key, gb_value in notes_value.iteritems():
                            if gb_key not in ["type"]:
                                gb_annot.qualifiers[gb_key] = gb_value
                            elif gb_key == "type":
                                annotation_type = gb_value

        if "start" in annotation:
            strand = 1
            if "strand" in annotation and annotation["strand"] == -1:
                strand = -1
            gb_annot.location = SeqFeature.FeatureLocation(
                annotation["start"], annotation["end"] + 1, strand)

        gb_annot.type = annotation_type

        gb.features.append(gb_annot)
Esempio n. 4
0
def read_reference(fname, genemap):
    try:
        ref = str(SeqIO.read(fname, 'fasta').seq)
    except:
        with open(fname, 'r') as fh:
            ref = "".join([x.strip() for x in fh])

    translations = {}
    with open(genemap, 'r') as fh:
        for line in fh:
            if line[0] == '#':
                continue
            entries = [x.strip() for x in line.strip().split('\t')]
            start = int(entries[3])
            end = int(entries[4])
            strand = entries[6]
            attributes = {
                x.split()[0]: ' '.join(x.split()[1:])
                for x in entries[8].split(';')
            }
            if 'gene_name' in attributes:
                name = attributes['gene_name'].strip('"')
            else:
                name = None
            translation = Seq.translate(
                SeqFeature.SeqFeature(
                    SeqFeature.FeatureLocation(
                        start - 1, end,
                        strand=-1 if strand == '-' else 1)).extract(ref))
            translations[name] = str(translation)

    return {"nuc": ref, "translations": translations}
Esempio n. 5
0
def create_feature(hit, end):
    '''
    Creates a feature from the hit, setting colours
    based on orientation and noting whether it is a
    left or right end hit.
    '''
    # Set up coordinates
    start = int(hit[1])
    stop = int(hit[2])
    quals = {}

    location = SeqFeature.FeatureLocation(start, stop)
    # Annotate with end information and colour accordingly
    if end == 'five':
        quals['colour'] = '2'
        quals['end'] = 'left_end'
        feat_type = 'left end'
    elif end == 'three':
        quals['colour'] = '7'
        quals['end'] = 'right end'
        feat_type = 'right_end'
    # Create feature
    feature = SeqFeature.SeqFeature(location, type=feat_type, qualifiers=quals)

    return feature
Esempio n. 6
0
 def _parse_feature(element):
     feature = SeqFeature.SeqFeature()
     for k, v in element.attrib.items():
         feature.qualifiers[k] = v
     feature.type = element.attrib.get('type', '')
     if 'id' in element.attrib:
         feature.id = element.attrib['id']
     for feature_element in element.getchildren():
         if feature_element.tag == NS + 'location':
             position_elements = feature_element.findall(NS +
                                                         'position')
             if position_elements:
                 element = position_elements[0]
                 start_position = _parse_position(element, -1)
                 end_position = _parse_position(element)
             else:
                 element = feature_element.findall(NS + 'begin')[0]
                 start_position = _parse_position(element, -1)
                 element = feature_element.findall(NS + 'end')[0]
                 end_position = _parse_position(element)
             feature.location = SeqFeature.FeatureLocation(
                 start_position, end_position)
         else:
             try:
                 feature.qualifiers[feature_element.tag.replace(
                     NS, '')] = feature_element.text
             except:
                 pass  #skip unparsable tag
     self.ParsedSeqRecord.features.append(feature)
Esempio n. 7
0
 def add_feature(self, search_str, label):
     """Label a feature by literal string match, failing silently.
     Does not label features that wrap around circular sequence
     Args:
         search_str (str): string representing feature
         label (str): feature name to display"""
     f_matches = re.finditer(search_str, str(self.seq), re.IGNORECASE)
     r_matches = ()
     if search_str.lower() != dna.revc(search_str.lower()):
         # Don't label palindromes twice
         r_matches = re.finditer(dna.revc(search_str), str(self.seq),
                                 re.IGNORECASE)
     for m, strand in chain(izip_longest(f_matches, [1]),
                            izip_longest(r_matches, [-1])):
         if m:
             feature = SeqFeature.SeqFeature(
                 SeqFeature.FeatureLocation(m.start(), m.end(), strand),
                 'misc_feature')
             color = self.colors.next()
             feature.qualifiers = {
                 'label': [label],
                 'ApEinfo_fwdcolor': [color],
                 'ApEinfo_revcolor': [color],
                 'ApEinfo_graphicformat':
                 ['arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0']
             }
             self.features.append(feature)
     return
Esempio n. 8
0
 def _parse_feature(element):
     feature = SeqFeature.SeqFeature()
     for k, v in element.attrib.items():
         feature.qualifiers[k] = v
     feature.type = element.attrib.get("type", "")
     if "id" in element.attrib:
         feature.id = element.attrib["id"]
     for feature_element in element:
         if feature_element.tag == NS + "location":
             position_elements = feature_element.findall(NS + "position")
             if position_elements:
                 element = position_elements[0]
                 start_position = _parse_position(element, -1)
                 end_position = _parse_position(element)
             else:
                 element = feature_element.findall(NS + "begin")[0]
                 start_position = _parse_position(element, -1)
                 element = feature_element.findall(NS + "end")[0]
                 end_position = _parse_position(element)
             feature.location = SeqFeature.FeatureLocation(
                 start_position, end_position
             )
         else:
             try:
                 feature.qualifiers[
                     feature_element.tag.replace(NS, "")
                 ] = feature_element.text
             except Exception:  # TODO - Which exceptions?
                 pass  # skip unparsable tag
     self.ParsedSeqRecord.features.append(feature)
Esempio n. 9
0
def _retrieve_reference(adaptor, primary_id):
    # XXX dbxref_qualifier_value

    refs = adaptor.execute_and_fetchall(
        "SELECT start_pos, end_pos, "
        " location, title, authors,"
        " dbname, accession"
        " FROM bioentry_reference"
        " JOIN reference USING (reference_id)"
        " LEFT JOIN dbxref USING (dbxref_id)"
        " WHERE bioentry_id = %s"
        " ORDER BY rank", (primary_id,))
    references = []
    for start, end, location, title, authors, dbname, accession in refs:
        reference = SeqFeature.Reference()
        # If the start/end are missing, reference.location is an empty list
        if (start is not None) or (end is not None):
            if start is not None:
                start -= 1  # python counting
            reference.location = [SeqFeature.FeatureLocation(start, end)]
        # Don't replace the default "" with None.
        if authors:
            reference.authors = authors
        if title:
            reference.title = title
        reference.journal = location
        if dbname == 'PUBMED':
            reference.pubmed_id = accession
        elif dbname == 'MEDLINE':
            reference.medline_id = accession
        references.append(reference)
    if references:
        return {'references': references}
    else:
        return {}
def make_genbank_recs(rec):
    new_rec = rec
    #new_rec.seq.alphabet = generic_dna
    scaffold = new_rec.id

    scaffold_recs = list(
        filter(lambda x: x.id.startswith(scaffold + '_'), protein_recs))

    for protein_rec in scaffold_recs:
        start = int(protein_rec.description.split(' # ')[1])
        startpos = SeqFeature.ExactPosition(start)
        end = int(protein_rec.description.split(' # ')[2])
        endpos = int(SeqFeature.ExactPosition(end))
        strand = int(protein_rec.description.split(' # ')[3])
        rec_location = FeatureLocation(startpos, endpos)
        rec_feature = SeqFeature.SeqFeature(rec_location,
                                            type="CDS",
                                            strand=strand)

        #Add ORF name without genome ID
        rec_feature.qualifiers['protein_id'] = protein_rec.id
        rec_feature.qualifiers['translation'] = protein_rec.seq
        rec_feature.qualifiers['locus_tag'] = protein_rec.description

        new_rec.features.append(rec_feature)
    return new_rec
Esempio n. 11
0
def createFEATUREannot(loc_range, featuretype, s):
    """ Creates a new SeqFeature with ExactPositions based on range."""
    location = SeqFeature.FeatureLocation(
        SeqFeature.ExactPosition(loc_range[0]),
        SeqFeature.ExactPosition(loc_range[1]))
    new_feature = SeqFeature.SeqFeature(location, type=featuretype, strand=s)
    return (new_feature)
Esempio n. 12
0
 def ins_tag(self,tag_seq,protease_seq,ins_name,ins_sites,side=5): #the cutpoint is after ins_sites[0] bp and after ins_sites[1] bp
     from Bio.Alphabet import IUPAC
     from Bio.Seq import Seq
     from Bio.SeqRecord import SeqRecord
     for feature in self.record.features:
         if feature.qualifiers.has_key("note")\
         and re.search(r"^mcs",feature.qualifiers["note"][0],re.I):
             mcs_start=int(str(feature.location.start))
             mcs_end=int(str(feature.location.end))
             mcs_qualifiers=feature.qualifiers
     if ins_sites[0]>mcs_start and ins_sites[1]<mcs_end:
         f_mcs=SeqFeature(FeatureLocation(mcs_start,
             mcs_end+ins_sites[0]-ins_sites[1]+len(tag_seq+protease_seq)),type="mcs")
         f_mcs.qualifiers=mcs_qualifiers
     if side==5:
         ins_record=SeqRecord(Seq(tag_seq+protease_seq,IUPAC.ambiguous_dna))
         f=SeqFeature(FeatureLocation(0,len(tag_seq)),type="tag")
     elif side==3:
         ins_record=SeqRecord(Seq(protease_seq+tag_seq,IUPAC.ambiguous_dna))
         f=SeqFeature(FeatureLocation(len(protease_seq),len(protease_seq+tag_seq)),type="tag")
     f.qualifiers["note"]=[ins_name,]
     ins_record.features=[f]
     old_name=self.record.name
     self.record=self.record[:ins_sites[0]]+ins_record+self.record[ins_sites[1]:]
     self.record.name=old_name
     self.whole_len=len(self.record)
     self.record.features.append(f_mcs)
     self.record.features=sorted(self.record.features,key=lambda x:int(str(x.location.start)))
Esempio n. 13
0
def _make_position(location_string, offset=0):
    """Turn a Swiss location position into a SeqFeature position object (PRIVATE).

    An offset of -1 is used with a start location to make it pythonic.
    """
    if location_string == "?":
        return SeqFeature.UnknownPosition()
    # Hack so that feature from 0 to 0 becomes 0 to 0, not -1 to 0.
    try:
        return SeqFeature.ExactPosition(max(0, offset + int(location_string)))
    except ValueError:
        pass
    if location_string.startswith("<"):
        try:
            return SeqFeature.BeforePosition(
                max(0, offset + int(location_string[1:])))
        except ValueError:
            pass
    elif location_string.startswith(">"):  # e.g. ">13"
        try:
            return SeqFeature.AfterPosition(
                max(0, offset + int(location_string[1:])))
        except ValueError:
            pass
    elif location_string.startswith("?"):  # e.g. "?22"
        try:
            return SeqFeature.UncertainPosition(
                max(0, offset + int(location_string[1:])))
        except ValueError:
            pass
    raise NotImplementedError("Cannot parse location '%s'" % location_string)
Esempio n. 14
0
def al_string2feat(queryseq, ampsdict): #lib5pr is subjectseq; t7 is queryseq
    '''
    This function accepts a query seq and a dictionary of subjectseqs, where the key (amp)
    is contained in a field in queryseq, highlighting the location of queryseq in it.
    Returns a string.
    '''
    subjectseq = SeqRecord(ampsdict[queryseq[1][0]])
    #for seqrecord in subjectseq:
    locstart = queryseq[1][1]
    #print queryseq
    locend = queryseq[1][2]
    fwdlocs = []
    revlocs = []
    # Figure out which strand the BLAST hit is on
    if locstart <= locend:
        fwdlocs.append(locstart)
    if locstart > locend:
        revlocs.append(locend)

    for item in fwdlocs:
        start = ExactPosition(int(item))
        end = ExactPosition(int((item) + len(queryseq[0].seq) + 1))
        location = FeatureLocation(start, end)
        feature = SeqFeature(location,type=str("cutsite_fwd"), strand = +1)
        subjectseq.features.append(feature)

    for item in revlocs:
        start = ExactPosition(int(item))
        end = ExactPosition(start + len(queryseq[0].seq))
        location = FeatureLocation(start, end)
        feature = SeqFeature(location,type=str("cutsite_rev"), strand = -1)
        subjectseq.features.append(feature)
    #print subjectseq.features
    return subjectseq
Esempio n. 15
0
 def _get_feature(self, feature_dict):
     """Retrieve a Biopython feature from our dictionary representation.
     """
     location = SeqFeature.FeatureLocation(*feature_dict['location'])
     new_feature = SeqFeature.SeqFeature(location, feature_dict['type'],
             id=feature_dict['id'], strand=feature_dict['strand'])
     new_feature.qualifiers = feature_dict['quals']
     return new_feature
Esempio n. 16
0
def _make_seqfeature(name, from_res, to_res, description, ft_id):
    """Construct SeqFeature from feature data from parser (PRIVATE)."""
    loc = SeqFeature.FeatureLocation(_make_position(from_res, -1),
                                     _make_position(to_res, 0))
    if not ft_id:
        ft_id = "<unknown id>"  # The default in SeqFeature object
    return SeqFeature.SeqFeature(loc, type=name, id=ft_id,
                                 qualifiers={"description": description})
Esempio n. 17
0
def create_feature(sequence, name, start, end, strand=+1):

    if str(name) and int(start) and int(end):
        my_feature_location = SeqFeature.FeatureLocation(start,
                                                         end,
                                                         strand=strand)
        my_feature = SeqFeature.SeqFeature(my_feature_location, type=name)
        sequence.features.append(my_feature)
Esempio n. 18
0
def add_features(block, allblocks, gb, start):
    # Disregard fillers... don't create features for them
    if is_filler(block):
        return start + block["sequence"]["length"]

    # For handling list blocks!
    if "current_option" in block:
        option = [b for b in allblocks if b["id"] == block["current_option"]][0]
        return add_features(option, allblocks, gb, start)

    # Add Myself as a feature
    sf = SeqFeature.SeqFeature()
    # Set the type based on the original type or the role type
    if "genbank" in block["metadata"] and "type" in block["metadata"]["genbank"]:
        sf.type = block["metadata"]["genbank"]["type"]
    elif "rules" in block and "role" in block["rules"] and block["rules"]["role"] is not None and block["rules"]["role"] != "":
        sf.type = block["rules"]["role"]
    else:
        sf.type = "misc_feature"

    # Set up the location of the feature
    feature_strand = 1
    if "strand" in block["metadata"]:
        feature_strand = block["metadata"]["strand"]

    # And copy all the other qualifiers that came originally from genbank
    if "genbank" in block["metadata"]:
        for annot_key, annot_value in block["metadata"]["genbank"].iteritems():
            if annot_key not in ["name_source", "note"]:
                sf.qualifiers[annot_key] = annot_value

    convert_block_name(sf, block)

    add_GC_info(sf, block, allblocks)

    convert_annotations(block, gb, start)

    # Add my children as features
    child_start = start
    for i in range(0, len(block["components"])):
        block_id = block["components"][i]
        bl = [b for b in allblocks if b["id"] == block_id][0]
        child_start = add_features(bl, allblocks, gb, child_start)

    if child_start != start:
        # The end is where the last child ended...
        end = child_start
    else:
        # No children, look at the block's length
        if "sequence" in block:
            end = start + block["sequence"]["length"]
        else:
            end = start

    sf.location = SeqFeature.FeatureLocation(start, end, strand=feature_strand)
    gb.features.append(sf)

    return end
Esempio n. 19
0
def spacersonly(seqs):
    sgRNAconst = SeqRecord(Seq("GTTTAAGAG"))
    while True:
        seqrecord = seqs.next()
        #for seqrecord in seqs:
        fwdlocs = []
        revlocs = []
        fwdlocs = [
            tloc.start()
            for tloc in re.finditer(str(sgRNAconst.seq), str(seqrecord.seq))
        ]
        for item in fwdlocs:
            start = ExactPosition(int(item) + 1)
            end = ExactPosition(int((item) + len(sgRNAconst)))
            location = FeatureLocation(start, end)
            feature = SeqFeature(location, type="sgRNAconst", strand=+1)
            seqrecord.features.append(feature)
        revlocs = [
            tloc.start() for tloc in re.finditer(
                str(sgRNAconst.reverse_complement().seq), str(seqrecord.seq))
        ]
        for item in revlocs:
            start = ExactPosition(int(item) + 1)
            end = ExactPosition(start + len(sgRNAconst) - 1)
            location = FeatureLocation(start, end)
            feature = SeqFeature(location, type="sgRNAconst", strand=-1)
            seqrecord.features.append(feature)
        for feat in seqrecord.features:
            if feat.strand == 1:
                tgtstart = int(feat.location.start) - 36  # -21
                tgtend = int(feat.location.start) - 1
                sgtgt = seqrecord[tgtstart:tgtend]
                #yield sgtgt
                #alltgts.append(sgtgt)
                #print "pos \n \n"
            if feat.strand == -1:
                tgtend = int(feat.location.end) + 36  # +21
                tgtstart = int(feat.location.end)
                sgtgt = seqrecord[tgtstart:tgtend].reverse_complement()
                sgtgt.name = seqrecord.name
                #yield sgtgt
                #alltgts.append(sgtgt)
            bad = 0
            try:
                l = [
                    tloc.end()
                    for tloc in re.finditer("ACTCACTATAG", str(sgtgt.seq))
                ]
                sgtgt = sgtgt[int(l[0]):]
            except:
                None
            for score in sgtgt.letter_annotations["phred_quality"]:
                if score < 30:
                    bad = 1
            if bad == 0 and len(sgtgt) > 10:
                yield sgtgt
                break
Esempio n. 20
0
def translateFeatureLocation(location, region, translation=0):
    location2 = location + translation + 1
    if location2.end < 0:
        logging.debug('Error-prone feature detected: {}'.format(location2))
        return SeqFeature.FeatureLocation(start=0, end=0, strand=0)
    else:
        return SeqFeature.FeatureLocation(start=max(0, location2.start),
                                          end=min(location2.end, region.end),
                                          strand=location2.strand)
Esempio n. 21
0
def addFeatureComplSTF():
    if m.end() <= seqLength:
        newFeature = SeqFeature(FeatureLocation(m.start(),m.end(), strand=-1), type=str(feature))
        newFeature.qualifiers['note'] = featureName
        newRecord.features.append(newFeature)
    else:
        newFeature = SeqFeature(CompoundLocation([FeatureLocation(m.start(),seqLength, strand=-1), FeatureLocation(1, (seqLength - m.end()), strand=-1)]), type=str(feature))
        newFeature.qualifiers['note'] = featureName
        newRecord.features.append(newFeature)
 def test_GenerateFeatLoc__make_start_fuzzy__1(self):
     ''' Test to evaluate function `make_start_fuzzy` of class `GenerateFeatLoc`.
         This test evaluates the case where FeatureLocations are made fuzzy. '''
     from Bio import SeqFeature
     start_pos = SeqFeature.ExactPosition(5)
     end_pos = SeqFeature.ExactPosition(9)
     location_object = SeqFeature.FeatureLocation(start_pos, end_pos)
     out = GnOps.GenerateFeatLoc().make_start_fuzzy(location_object)
     self.assertIsInstance(out, Bio.SeqFeature.FeatureLocation) # FeatureLocation
     self.assertIsInstance(out.start, Bio.SeqFeature.BeforePosition) # Fuzzy Start
Esempio n. 23
0
def contig_info(contig_id, contig_seq, species_informations):
    """
    Create contig information from species_informations dictionary and contig id and contig seq.
    """
    record = SeqRecord(contig_seq,
                       id=contig_id,
                       name=contig_id,
                       description=species_informations['description'],
                       annotations={"molecule_type": "DNA"})

    if IUPAC:
        record.seq.alphabet = IUPAC.ambiguous_dna
    if 'data_file_division' in species_informations:
        record.annotations['data_file_division'] = species_informations[
            'data_file_division']
    record.annotations['date'] = datetime.date.today().strftime(
        '%d-%b-%Y').upper()
    if 'topology' in species_informations:
        record.annotations['topology'] = species_informations['topology']
    record.annotations['accessions'] = contig_id
    if 'organism' in species_informations:
        record.annotations['organism'] = species_informations['organism']
    # Use of literal_eval for taxonomy and keywords to retrieve list.
    if 'taxonomy' in species_informations:
        record.annotations['taxonomy'] = species_informations['taxonomy']
    if 'keywords' in species_informations:
        record.annotations['keywords'] = species_informations['keywords']
    if 'source' in species_informations:
        record.annotations['source'] = species_informations['source']

    new_feature_source = sf.SeqFeature(sf.FeatureLocation(
        1 - 1, len(contig_seq)),
                                       type="source")
    new_feature_source.qualifiers['scaffold'] = contig_id
    if 'isolate' in species_informations:
        new_feature_source.qualifiers['isolate'] = species_informations[
            'isolate']
    # db_xref corresponds to the taxon NCBI ID.
    # Important if you want to use Pathway Tools after.
    if 'db_xref' in species_informations:
        new_feature_source.qualifiers['db_xref'] = species_informations[
            'db_xref']
    if 'cell_type' in species_informations:
        new_feature_source.qualifiers['cell_type'] = species_informations[
            'cell_type']
    if 'dev_stage' in species_informations:
        new_feature_source.qualifiers['dev_stage'] = species_informations[
            'dev_stage']
    if 'mol_type' in species_informations:
        new_feature_source.qualifiers['mol_type'] = species_informations[
            'mol_type']

    record.features.append(new_feature_source)

    return record
Esempio n. 24
0
        def _parse_dbReference(element):
            self.ParsedSeqRecord.dbxrefs.append(element.attrib["type"] + ":" +
                                                element.attrib["id"])
            if "type" in element.attrib:
                # <dbReference type="EMBL" id="U96180">
                #    <property type="protein sequence ID" value="AAB66902.1"/>
                #    <property type="molecule type" value="mRNA"/>
                # </dbReference>
                if element.attrib["type"] == "EMBL":
                    for ref_element in element:
                        if "type" in ref_element.attrib and "value" in ref_element.attrib:
                            if ref_element.attrib[
                                    "type"] == "protein sequence ID":
                                self.ParsedSeqRecord.dbxrefs.append(
                                    "EMBL-CDS:" + ref_element.attrib["value"])
                # e.g.
                # <dbReference type="PDB" key="11" id="2GEZ">
                #   <property value="X-ray" type="method"/>
                #   <property value="2.60 A" type="resolution"/>
                #   <property value="A/C/E/G=1-192, B/D/F/H=193-325" type="chains"/>
                # </dbReference>
                elif element.attrib["type"] == "PDB":
                    method = ""
                    resolution = ""
                    for ref_element in element:
                        if ref_element.tag == NS + "property":
                            dat_type = ref_element.attrib["type"]
                            if dat_type == "method":
                                method = ref_element.attrib["value"]
                            if dat_type == "resolution":
                                resolution = ref_element.attrib["value"]
                            if dat_type == "chains":
                                pairs = ref_element.attrib["value"].split(",")
                                for elem in pairs:
                                    pair = elem.strip().split("=")
                                    if pair[1] != "-":
                                        # TODO - How best to store these, do SeqFeatures make sense?
                                        feature = SeqFeature.SeqFeature()
                                        feature.type = element.attrib["type"]
                                        feature.qualifiers[
                                            "name"] = element.attrib["id"]
                                        feature.qualifiers["method"] = method
                                        feature.qualifiers[
                                            "resolution"] = resolution
                                        feature.qualifiers["chains"] = pair[
                                            0].split("/")
                                        start = int(pair[1].split("-")[0]) - 1
                                        end = int(pair[1].split("-")[1])
                                        feature.location = SeqFeature.FeatureLocation(
                                            start, end)
                                        # self.ParsedSeqRecord.features.append(feature)

            for ref_element in element:
                if ref_element.tag == NS + "property":
                    pass  # this data cannot be fitted in a seqrecord object with a simple list. however at least ensembl and EMBL parsing can be improved to add entries in dbxrefs
Esempio n. 25
0
 def _get_feature(self, feature_dict):
     """Retrieve a Biopython feature from our dictionary representation.
     """
     location = SeqFeature.FeatureLocation(*feature_dict['location'])
     new_feature = SeqFeature.SeqFeature(location, feature_dict['type'],
             id=feature_dict['id'], strand=feature_dict['strand'])
     # Support for Biopython 1.68 and above, which removed sub_features
     if not hasattr(new_feature, "sub_features"):
         new_feature.sub_features = []
     new_feature.qualifiers = feature_dict['quals']
     return new_feature
Esempio n. 26
0
 def _trans_loc(loc):
     # Don't write the contig ID in the loc line unless it's trans-spliced
     if loc[0] == current_contig_id:
         loc[0] = None
     if loc[2] == "-":
         return SeqFeature.FeatureLocation(loc[1] - loc[3], loc[1], -1,
                                           loc[0])
     else:
         return SeqFeature.FeatureLocation(loc[1] - 1,
                                           loc[1] + loc[3] - 1, 1,
                                           loc[0])
Esempio n. 27
0
def create_genbank(gene_nucleic_seqs, gene_protein_seqs, annot, go_namespaces,
                   go_alternatives, output_path, species_informations):
    """ Create genbank file from nucleic and protein fasta plus eggnog mapper annotation file.

    Args:
        gene_nucleic_seqs (dict): dictionary of nucleic sequences (key: sequence id, value: sequence)
        gene_protein_seqs (dict): dictionary of protein sequences (key: sequence id, value: sequence)
        annot (dict): dictionary of eggnog-ammper annotation (key: gene_id, value: ['GOs','EC', 'Preferred_name'])
        go_namespaces (dict): dictionary of GO terms namespace (key: GO Term ID, value: namespace associated to GO Term)
        go_alternatives (dict): dictionary of GO terms alternatives ID (key: GO Term ID, value: alternatives GO Term associated to GO Term)
        output_path (str): output file or directory
        species_informations (dict): dictionary containing information about species
    """
    # All SeqRecord objects will be stored in a list and then give to the SeqIO writer to create the genbank.
    records = []

    # Iterate through each contig/gene.
    for gene_nucleic_id in sorted(gene_nucleic_seqs):
        # Create a SeqRecord object using gene information.
        record = record_info(gene_nucleic_id,
                             gene_nucleic_seqs[gene_nucleic_id],
                             species_informations)

        # If id is numeric, change it
        if gene_nucleic_id.isnumeric():
            id_gene = f"gene_{gene_nucleic_id}"
        elif "|" in gene_nucleic_id:
            id_gene = gene_nucleic_id.split("|")[1]
        else:
            id_gene = gene_nucleic_id
        start_position = 1
        end_position = len(gene_nucleic_seqs[gene_nucleic_id])
        strand = 0
        new_feature_gene = sf.SeqFeature(sf.FeatureLocation(
            start_position, end_position, strand),
                                         type="gene")
        new_feature_gene.qualifiers['locus_tag'] = id_gene

        # Add gene information to contig record.
        record.features.append(new_feature_gene)

        new_cds_feature = create_cds_feature(id_gene, start_position,
                                             end_position, strand, annot,
                                             go_namespaces, go_alternatives,
                                             gene_protein_seqs)
        new_cds_feature.qualifiers['locus_tag'] = id_gene

        # Add CDS information to contig record
        record.features.append(new_cds_feature)

        records.append(record)

    # Create Genbank with the list of SeqRecord.
    SeqIO.write(records, output_path, 'genbank')
 def test_GenerateFeatLoc__make_start_fuzzy__3(self):
     ''' Test to evaluate function `make_start_fuzzy` of class `GenerateFeatLoc`.
         This test evaluates if end FeatureLocations are made fuzzy. 
         See AfterPosition. '''
     from Bio import SeqFeature
     start_pos = SeqFeature.ExactPosition(5)
     end_pos = SeqFeature.ExactPosition(9)
     location_object = SeqFeature.FeatureLocation(start_pos, end_pos)
     out = GnOps.GenerateFeatLoc().make_end_fuzzy(location_object)
     self.assertIsInstance(out, Bio.SeqFeature.FeatureLocation) # FeatureLocation
     self.assertIsInstance(out.end, Bio.SeqFeature.AfterPosition) # Fuzzy End
Esempio n. 29
0
 def _get_feature(self, feature_dict):
     """Retrieve a Biopython feature from our dictionary representation."""
     location = SeqFeature.FeatureLocation(*feature_dict["location"])
     new_feature = SeqFeature.SeqFeature(
         location,
         feature_dict["type"],
         id=feature_dict["id"],
         strand=feature_dict["strand"],
     )
     new_feature.qualifiers = feature_dict["quals"]
     return new_feature
Esempio n. 30
0
def convert_annotations(block, gb, start):
    if "sequence" not in block:
        return

    # Add My annotations as features
    for annotation in block["sequence"]["annotations"]:
        gb_annot = SeqFeature.SeqFeature()
        annotation_type = "misc_feature"

        if "role" in annotation and annotation["role"] != "":
            annotation_type = annotation["role"]

        for key, value in annotation.iteritems():
            if key not in [
                    "start", "end", "notes", "strand", "color", "role",
                    "isForward"
            ]:
                gb_annot.qualifiers[key] = value
            elif key == "notes" and "genbank" in annotation["notes"]:
                for gb_key, gb_value in annotation["notes"][
                        "genbank"].iteritems():
                    if gb_key not in ["type", "note"]:
                        gb_annot.qualifiers[gb_key] = gb_value
                    elif gb_key == "type":
                        annotation_type = gb_value

        gc_info = {
            "GC": {
                "name": annotation["name"],
                "type": "annotation",
                "parents": [block["id"]]
            }
        }
        if "color" in annotation:
            gc_info["GC"]["color"] = annotation["color"]
        if "notes" in annotation and "genbank" in annotation[
                "notes"] and "note" in annotation["notes"]["genbank"]:
            gc_info["note"] = annotation["notes"]["genbank"]["note"]
        gb_annot.qualifiers["note"] = json.dumps(gc_info).replace("\"", "'")

        if "start" in annotation:
            strand = 1
            if "isForward" in annotation and annotation["isForward"] == -1:
                strand = -1
            # Remember: annotations start and end are relative to the block
            gb_annot.location = SeqFeature.FeatureLocation(
                annotation["start"] + start, annotation["end"] + start + 1,
                strand)

        gb_annot.type = annotation_type

        gb.features.append(gb_annot)
def add_feature(sequence_rec, start_postion, end_position, strand, name,
                feature_type, feature_id):
    # add a feature to the seq record

    my_feature_location = SeqFeature.FeatureLocation(start_postion - 1,
                                                     end_position,
                                                     strand=strand)
    my_feature = SeqFeature.SeqFeature(my_feature_location,
                                       type=feature_type,
                                       id=feature_id)
    my_feature.qualifiers["label"] = name

    return my_feature
Esempio n. 32
0
 def to_seq_feature(self):
     quals = {}
     for q in self.qualifiers.all():
         quals[q.name] = q.data
     s = None
     if self.direction == 'f':
         s = 1
     elif self.direction == 'r':
         s = -1
     return SeqFeature.SeqFeature(location=SeqFeature.FeatureLocation(
         self.start, self.end),
                                  type=self.type,
                                  strand=s,
                                  qualifiers=quals)
def make_feature(product, blast_qresult, fragment ,hit, hsp, fragstart, count):
    s = hsp.hit_start
    e = hsp.hit_end
    if product == 'YR':
        s = hsp.hit_start-int(fragstart)
        e = hsp.hit_end-int(fragstart)
    feature = SeqFeature(FeatureLocation(s, e), type="DOMAIN", strand= hsp.hit_strand)
    feature.qualifiers['loc_on_contig'] = str(hsp.hit_start+1) + '..' + str(hsp.hit_end)
    feature.qualifiers['product'] = product
    feature.qualifiers['serial_on_frag'] = count
    count += 1
    feature.qualifiers['program'] = blast_qresult.program + "_" + blast_qresult.version
    feature.qualifiers['evalue'] = hsp.evalue
    feature.qualifiers['assembly'] = blast_qresult.target.split('/')[-1]
    feature.qualifiers['contig'] = contig
    feature.qualifiers['translation'] = feature.extract(fragment.seq).translate()
    return (feature, count)
Esempio n. 34
0
def writePBS():
    global variation, seqRecordToCheck, seqRecordToCheckComplement, difference, newFeature
    for variation in featureStatistic_container[feature]:
        primerSeq = str(variation.seq)
        primerName = variation.note

        partialPrimerSeq = primerSeq[len(primerSeq) - 15::]
        seqRecordToCheck = str(record.seq)
        seqRecordToCheckComplement = str(reverse_complement(record.seq))

        matchingPrimerPositions = SeqUtils.nt_search(seqRecordToCheck, partialPrimerSeq)

        if (len(matchingPrimerPositions) > 1):
            difference = len(primerSeq) - len(partialPrimerSeq)
            length = len(matchingPrimerPositions)
            for j in range(1, length):
                if primerSeq == seqRecordToCheck[matchingPrimerPositions[j] -
                        difference: matchingPrimerPositions[j] - difference + len(primerSeq)]:
                    newFeature = SeqFeature(FeatureLocation(matchingPrimerPositions[j],
                                                            matchingPrimerPositions[j] + len(primerSeq),
                                                            strand=1), type=str(feature))
                    newFeature.qualifiers['note'] = primerName
                    newRecord.features.append(newFeature)

                else:
                    newFeature = SeqFeature(FeatureLocation(matchingPrimerPositions[j], AfterPosition(
                        matchingPrimerPositions[j] + len(primerSeq)), strand=1), type=str(feature))
                    newFeature.qualifiers['note'] = primerName
                    newRecord.features.append(newFeature)

        matchingPrimerPositions = SeqUtils.nt_search(seqRecordToCheckComplement, partialPrimerSeq)

        if (len(matchingPrimerPositions) > 1):
            difference = len(primerSeq) - len(partialPrimerSeq)
            length = len(matchingPrimerPositions)
            for j in range(1, length):
                if primerSeq == seqRecordToCheckComplement[matchingPrimerPositions[j] -
                        difference: matchingPrimerPositions[j] - difference + len(primerSeq)]:
                    newFeature = SeqFeature(FeatureLocation(matchingPrimerPositions[j],
                                                            matchingPrimerPositions[j] + len(primerSeq),
                                                            strand=-1), type=str(feature))
                    newFeature.qualifiers['note'] = primerName
                    newRecord.features.append(newFeature)
                else:
                    newFeature = SeqFeature(FeatureLocation(matchingPrimerPositions[j], AfterPosition(
                        matchingPrimerPositions[j] + len(primerSeq)), strand=-1), type=str(feature))
                    newFeature.qualifiers['note'] = primerName
                    newRecord.features.append(newFeature)
Esempio n. 35
0
    def ins_insert(self,vec_5_site,utr_5_seq,ins_seq,utr_3_seq,vec_3_site,ins_name):
        from Bio.Alphabet import IUPAC
        from Bio.Seq import Seq
        from Bio.SeqRecord import SeqRecord
        ins_record=SeqRecord(Seq(utr_5_seq+ins_seq+utr_3_seq,IUPAC.ambiguous_dna))

        f_i=SeqFeature(FeatureLocation(len(utr_5_seq),len(utr_5_seq)+len(ins_seq)),type="insert")
        f_i.qualifiers["note"]=[ins_name,]
        ins_record.features=[f_i]
        old_name=self.record.name
        for feature in self.record.features:
            if feature.qualifiers.has_key("note")\
            and re.search(r"^mcs",feature.qualifiers["note"][0],re.I):
                mcs_start=int(str(feature.location.start))
                mcs_end=int(str(feature.location.end))
                mcs_qualifiers=feature.qualifiers
        self.record=self.record[:vec_5_site]+ins_record+self.record[vec_3_site:]
        f_mcs=SeqFeature(FeatureLocation(mcs_start,
            vec_5_site+len(utr_5_seq+ins_seq+utr_3_seq)+mcs_end-vec_3_site),type="mcs")
        f_mcs.qualifiers=mcs_qualifiers
        self.record.features.append(f_mcs)
        self.record.features=sorted(self.record.features,key=lambda x:int(str(x.location.start)))
        self.record.name=old_name
        self.whole_len=len(self.record)
Esempio n. 36
0
def writeFeature(strand):
    global newFeature
    if (len(occurrence) > 1):
        for i in range(1, len(occurrence)):
            newFeature = SeqFeature(FeatureLocation(occurrence[i], occurrence[i] + len(featureSeq), strand=strand),
                                    type=str(feature))
            if variation.product is not None:
                newFeature.qualifiers['product'] = variation.product
            if variation.gene is not None:
                newFeature.qualifiers['gene'] = variation.gene
            if variation.bound_moiety is not None:
                newFeature.qualifiers['bound_moiety'] = variation.bound_moiety
            if variation.mobile is not None:
                newFeature.qualifiers['mobile'] = variation.mobile
            if variation.note is not None:
                newFeature.qualifiers['note'] = variation.note
            newRecord.features.append(newFeature)
Esempio n. 37
0
#    .WithinPosition: position between two nucleotides '(1.5)' in this way: position 1 is lower boundary, extension 4 is range to higher boundary
#    .OneOfPosition: any of a list of several numbers
#    .UnknownPosition: position of unknown location.
from Bio import SeqFeature
start_pos = SeqFeature.AfterPosition(5)
end_pos = SeqFeature.BetweenPosition(9, left=8, right=9)
mylocation = SeqFeature.FeatureLocation(start_pos, end_pos)
print mylocation, mylocation.start, mylocation.end, int(mylocation.end)

for feature in record.features:
    if 4350 in feature:    # if position 4350 is in any feature
        print feature.type, feature.qualifiers.get('db_xref')

from Bio.SeqFeature import SeqFeature, FeatureLocation
seqParent = Seq('ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC')
featu = SeqFeature(FeatureLocation(5, 18), type='gene', strand=-1)    # location [5:18] in reverse_complement
print featu
featureSeq = seqParent[featu.location.start:featu.location.end].reverse_complement()
print featureSeq
print featu.extract(seqParent), len(featu.extract(seqParent)), len(featu), len(featu.location)
# extract gets the subseq in location featu from seqParent

# References publications that mention it
# Bio.SeqFeature.Reference
#    journal: book, magazine, journal name
#    title, authors: of the paper
#    medline_id, pubmed_id: ID en Medline y PubMed
#    comment: about the reference
#    location: to specify location in the sequence mentioned in the paper

# format: method to output as fasta or genbank formatted seq
 if filename == inverted_repeats_file:
     direction = 'inverted'
 lines = open(filename, 'r').readlines()
 readl = 0
 for line in lines:
     if line[0:9] == 'FASTA_HDR': #Get fragment name
         parts = re.split(r'\s+',line)
         fragment = ':'.join(parts[1].split(':')[0:-1])
         if not fragment in repeat_features.keys():
             repeat_features[fragment] = []
     elif line[0:8] == 'FEATURES': #Start to read feature lines
         readl = 1
     elif line[0:6] == 'ORIGIN': #Stop reading feature lines and ...
         readl = 0
         if not end_a == 0:      #If there were features, put the last one in repeat_features
             feature = SeqFeature(FeatureLocation(int(start_a)-1, int(end_a)), type="REPEAT", id = direction + '_' + str(repeat_name) + '.1') #Make a SeqFeature object for the first repeat partenr
             for mod in modifyers.keys():
                 feature.qualifiers[mod] = modifyers[mod] #add the qualifiers to the SeqFeature object
             feature.qualifiers['name'] = direction + '_' + str(repeat_name) + '.1'
             repeat_features[fragment].append(feature)
             feature = SeqFeature(FeatureLocation(int(start_b)-1, int(end_b)), type="REPEAT", id = direction + '_' + str(repeat_name) + '.2') #Make a SeqFeature object for the second repeat partenr
             for mod in modifyers.keys():
                 feature.qualifiers[mod] = modifyers[mod] #add the qualifiers to the SeqFeature object
             feature.qualifiers['name'] = direction + '_' + str(repeat_name) + '.2'
             #print ' Got repeat ' + feature.qualifiers['name'] + ' on fragment ' + fragment 
             repeat_features[fragment].append(feature)
             repeat_name = 1 #Roll back all parameters to null for the next fragment
             start_a = 0
             end_a = 0
             start_b = 0
             end_b = 0