Esempio n. 1
0
def construct_location(raw_start: str, raw_end: str, raw_strand: str,
                       attributes: Dict[str, str], strict: bool = False) -> FeatureLocation:
    """ Converts the raw sections of a GFF line into a FeatureLocation.

        Some attribute keys can modify the location's values.
    """

    try:
        start = ExactPosition(int(raw_start) - 1)  # 0-indexed as FeatureLocation expects
        end = ExactPosition(int(raw_end))
    except ValueError as err:
        raise GFFParseError("Invalid location values: %s" % str(err))

    if start < 0 or end < 0:
        raise GFFParseError("Invalid location values: %s, %s" % (raw_start, raw_end))

    strand = interpret_strand(raw_strand, strict=strict)

    # handle ambiguous positions as noted in attributes
    if attributes.get("partial") == "true" and ("start_range" in attributes or "end_range" in attributes):
        attributes.pop("partial")
        start_range = attributes.pop("start_range", "%s,%s" % (start, end))
        end_range = attributes.pop("end_range", "%s,%s" % (start, end))
        if start_range.startswith("."):
            start = BeforePosition(int(start))
        if end_range.endswith("."):
            end = AfterPosition(int(end))

    return FeatureLocation(start, end, strand)
Esempio n. 2
0
def new_compound_location(
    indices: List[Union[Tuple[int, int], Tuple[int, int, int]]], strand: int
) -> CompoundLocation:
    locations = []
    for index in indices:
        if not isinstance(index, Tuple):
            raise ValueError(
                "Expects a tuple of integers size 2 or 3, not a {}".format(
                    indices.__class__
                )
            )
        if not len(index) in [2, 3]:
            raise ValueError("Expects a tuple of integers of size 2 or 3")
        if len(index) == 2:
            i, j = index
            s = strand
        elif len(index) == 3:
            i, j, s = index
        else:
            raise ValueError("Must be tuple of 2 or 3 integers")
        if not isinstance(i, int) or not isinstance(j, int) or not isinstance(s, int):
            raise ValueError(
                "Expects a tuple of integers of size 2 or 3. Found {}".format(index)
            )
        locations.append(FeatureLocation(ExactPosition(i), ExactPosition(j), strand=s))
    return CompoundLocation(locations)
        def _get_translation(feature, seq):
            nucseq = feature.location.extract(seq)
            offset = feature.qualifiers.get("codon_start", [1])[0] - 1
            right_offset = -1 * ((len(nucseq) - offset) % 3)
            if hasattr(tool, "transl_table"):
                transl_table = tool.transl_table
            else:
                transl_table = feature.qualifiers.get("transl_table", [11])[0]
            if transl_table == 4:
                start_codons = [
                    "TTA", "TTG", "CTG", "ATT", "ATC", "ATA", "GTG"
                ]  # and ATG  for transl_table 4
            else:
                start_codons = ["TTG", "CTG", "ATT", "ATC", "ATA",
                                "GTG"]  # and ATG   for transl_table 11

            if right_offset == 0:
                if isinstance(feature.location.start,
                              ExactPosition) and isinstance(
                                  feature.location.end, ExactPosition):
                    try:
                        translation = nucseq[offset:].translate(
                            table=transl_table, cds=True)
                    except TranslationError:
                        translation = nucseq[offset:].translate(
                            table=transl_table, to_stop=True)
                        if len(translation) * 3 != len(nucseq[offset:]):
                            self.logger.warning(
                                "Translation error in {}. In-frame stop codon exists. Translation was terminated at the first in-frame stop codon."
                                .format(feature.id))
                            before = str(feature.location)
                            if feature.location.strand == 1:
                                start = feature.location.start
                                end = ExactPosition(
                                    start + offset + len(translation) * 3 +
                                    3)  # Trailing +3 for stop-codon
                                feature.location = FeatureLocation(
                                    start, end, 1)
                            else:
                                end = feature.location.end
                                start = ExactPosition(
                                    end - offset - len(translation) * 3 -
                                    3)  # Trailing -3 for stop-codon
                                feature.location = FeatureLocation(
                                    start, end, -1)
                            after = str(feature.location)
                            self.logger.warning(
                                "CDS[{}] was fixed from {} to {}.".format(
                                    feature.id, before, after))
                else:
                    translation = nucseq[offset:].translate(table=transl_table,
                                                            to_stop=True)
            else:
                translation = nucseq[offset:right_offset].translate(
                    table=transl_table)  # , stop_symbol="")
            translation = str(translation)
            first_codon = str(nucseq[offset:offset + 3]).upper()
            if first_codon in start_codons:
                translation = "M" + translation[1:]
            return translation
Esempio n. 4
0
 def setUp(self):
     f0 = SeqFeature(
         FeatureLocation(0, 26),
         type="source",
         qualifiers={"mol_type": ["fake protein"]},
     )
     f1 = SeqFeature(FeatureLocation(0, ExactPosition(10)))
     f2 = SeqFeature(
         FeatureLocation(WithinPosition(12, left=12, right=15),
                         BeforePosition(22)))
     f3 = SeqFeature(
         FeatureLocation(
             AfterPosition(16),
             OneOfPosition(
                 26,
                 [ExactPosition(25), AfterPosition(26)]),
         ))
     self.record = SeqRecord(
         Seq("ABCDEFGHIJKLMNOPQRSTUVWZYX", generic_protein),
         id="TestID",
         name="TestName",
         description="TestDescr",
         dbxrefs=["TestXRef"],
         annotations={"k": "v"},
         letter_annotations={"fake": "X" * 26},
         features=[f0, f1, f2, f3],
     )
Esempio n. 5
0
    def add_point_feature(self,
                          resnum,
                          feat_type=None,
                          feat_id=None,
                          qualifiers=None):
        """Add a feature to the features list describing a single residue.

        Args:
            resnum (int): Protein sequence residue number
            feat_type (str, optional): Optional description of the feature type (ie. 'catalytic residue')
            feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')

        """
        if self.feature_file:
            raise ValueError(
                'Feature file associated with sequence, please remove file association to append '
                'additional features.')

        if not feat_type:
            feat_type = 'Manually added protein sequence single residue feature'
        newfeat = SeqFeature(location=FeatureLocation(
            ExactPosition(resnum - 1), ExactPosition(resnum)),
                             type=feat_type,
                             id=feat_id,
                             qualifiers=qualifiers)

        self.features.append(newfeat)
Esempio n. 6
0
    def create_genbank_file(self):
        """
        Greate a genbank file containing

        For more documentation on how to create new features, visit

         - http://biopython.org/\\
                 DIST/docs/api/Bio.SeqRecord.SeqRecord-class.html#__getitem__
         - http://biopython.org/\\
                 DIST/docs/api/Bio.SeqFeature.SeqFeature-class.html

         - http://www.ebi.ac.uk/\\
                 embl/Documentation/FT_definitions/feature_table.html
        """

        log.info("augmenting genbank file %s with putative operons" %
                 self.analysis.genbankfile_name)

        for i, o in enumerate(self.operons):
            location = FeatureLocation(ExactPosition(o.begin),
                                       ExactPosition(o.end))
            self.genbank_record.features.append(
                SeqFeature(location,
                           type='mRNA',
                           strand=o.strand,
                           qualifiers=dict(note='putative, confidence %d%%' %
                                           o.confidence,
                                           operon='rnas-%d' % i)))

        self.genbank_record.features.sort(
            key=lambda f: f.location.start.position)

        xgb_file = open(self.analysis.xgenbankfile_path, "w")

        SeqIO.write(self.genbank_record, xgb_file, "genbank")
Esempio n. 7
0
    def test_pickle(self):
        """Test pickle behaviour of position instances."""
        # setup
        import pickle

        within_pos = WithinPosition(10, left=10, right=13)
        between_pos = BetweenPosition(24, left=20, right=24)
        oneof_pos = OneOfPosition(
            1888,
            [ExactPosition(1888), ExactPosition(1901)])
        # test __getnewargs__
        self.assertEqual(within_pos.__getnewargs__(), (10, 10, 13))
        self.assertEqual(between_pos.__getnewargs__(), (24, 20, 24))
        self.assertEqual(
            oneof_pos.__getnewargs__(),
            (1888, [ExactPosition(1888),
                    ExactPosition(1901)]),
        )
        # test pickle behaviour
        within_pos2 = pickle.loads(pickle.dumps(within_pos))
        between_pos2 = pickle.loads(pickle.dumps(between_pos))
        oneof_pos2 = pickle.loads(pickle.dumps(oneof_pos))
        self.assertEqual(within_pos, within_pos2)
        self.assertEqual(between_pos, between_pos2)
        self.assertEqual(oneof_pos, oneof_pos2)
        self.assertEqual(within_pos._left, within_pos2._left)
        self.assertEqual(within_pos._right, within_pos2._right)
        self.assertEqual(between_pos._left, between_pos2._left)
        self.assertEqual(between_pos._right, between_pos2._right)
        self.assertEqual(oneof_pos.position_choices,
                         oneof_pos2.position_choices)
Esempio n. 8
0
    def test_get_mite_gene_location_intron_reverse_lots_of_introns(self):
        # Setup
        mite = SeqFeature(FeatureLocation(ExactPosition(511777-1),
                                          ExactPosition(512242),
                                          strand=1),
                          type='mRNA', id='AT1G02470')

        sub_features = [
            SubFeature(strand='-', start=510853-1, end=511011, name='T1'),
            SubFeature(strand='-', start=510853-1, end=511086, name='E7'),
            SubFeature(strand='-', start=511170-1, end=511217, name='E6'),
            SubFeature(strand='-', start=511310-1, end=511358, name='E5'),
            SubFeature(strand='-', start=511474-1, end=511526, name='E4'),
            SubFeature(strand='-', start=511621-1, end=511716, name='E3'),
            SubFeature(strand='-', start=512243-1, end=512342, name='E2'),
            SubFeature(strand='-', start=512428-1, end=512707, name='E1'),
            SubFeature(strand='-', start=512670-1, end=512707, name='F1'),]

        expected_start, expected_end = 'E2', 'I2'

        # Exercise
        start, end = get_mite_gene_location(mite, sub_features)

        # Verify
        self.assertEqual(start, expected_start)
        self.assertEqual(end, expected_end)
Esempio n. 9
0
    def test_annotate_sub_feature_reverse_correct_annotation_and_counts(self):
        # Setup
        sub_features = [SeqFeature(FeatureLocation(ExactPosition(9761599-1),
                                                   ExactPosition(9761802),
                                                   strand=-1),
                                   type='three_prime_UTR'),
                        SeqFeature(FeatureLocation(ExactPosition(9761599-1),
                                                   ExactPosition(9762165),
                                                   strand=-1),
                                   type='exon'),
                        SeqFeature(FeatureLocation(ExactPosition(9763450-1),
                                                   ExactPosition(9764167),
                                                   strand=-1),
                                   type='exon'),
                        SeqFeature(FeatureLocation(ExactPosition(9764158-1),
                                                   ExactPosition(9764167),
                                                   strand=-1),
                                   type='five_prime_UTR'),]

        feature = SeqFeature(FeatureLocation(ExactPosition(9762301-1),
                                             ExactPosition(9762350),
                                             strand=-1),
                             type='mRNA', id='AT1G28230',
                             sub_features=sub_features)

        expected_exon_counts = 2
        names = iter(['F1', 'E1', 'T1', 'E2'])  # possible sort diff if tie?

        # Exercise
        annotate_sub_feature_counts(feature)

        # Verify
        self.assertEqual(feature.exon_count, expected_exon_counts)
        for sub in feature.sub_features:
            self.assertEqual(sub.name, next(names))
Esempio n. 10
0
    def test_get_mite_gene_location_exon_forward(self):
        # Setup
        mite = SeqFeature(FeatureLocation(ExactPosition(14301135-1),
                                          ExactPosition(14301495),
                                          strand=1),
                          type='mRNA', id='AT1G38630')

        sub_features = [
            SubFeature(strand='+', start=14298853-1, end=14299101, name='F1'),
            SubFeature(strand='+', start=14298853-1, end=14299175, name='E1'),
            SubFeature(strand='+', start=14299460-1, end=14299528, name='E2'),
            SubFeature(strand='+', start=14301089-1, end=14301157, name='E3'),
            SubFeature(strand='+', start=14301443-1, end=14301511, name='E4'),
            SubFeature(strand='+', start=14301621-1, end=14301689, name='E5'),
            SubFeature(strand='+', start=14302679-1, end=14302747, name='E6'),
            SubFeature(strand='+', start=14302843-1, end=14302939, name='E7'),
            SubFeature(strand='+', start=14302892-1, end=14302939, name='T1'),]

        expected_start, expected_end = 'E3', 'E4'

        # Exercise
        start, end = get_mite_gene_location(mite, sub_features)

        # Verify
        self.assertEqual(start, expected_start)
        self.assertEqual(end, expected_end)
Esempio n. 11
0
def _annotate_feature(
    length: int,
    name: str,
    i: int = None,
    j: int = None,
    cyclic: bool = False,
    feature_type: str = None,
):
    if i is None:
        i = 0
    if j is None:
        j = length

    if cyclic and (j > length or (i > j)):
        if j > length:
            j = j - length
        feature = new_compound_feature(
            name=name,
            indices=[(i, length), (0, j)],
            strand=1,
            feature_type=feature_type,
        )
    else:
        feature = new_feature(
            name=name,
            location=FeatureLocation(ExactPosition(i), ExactPosition(j), strand=1),
            feature_type=feature_type,
        )
    return feature
Esempio n. 12
0
def test_lcs():
    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord as BSeqRecord
    from pydna.dseq import Dseq
    from pydna.dseqrecord import Dseqrecord
    from pydna.seqrecord import SeqRecord

    from pydna.seqfeature import SeqFeature
    from Bio.SeqFeature import FeatureLocation, ExactPosition

    s = SeqRecord(Seq("GGATCC"))

    expected = SeqFeature()
    expected.__dict__ = {
        "location": FeatureLocation(ExactPosition(0),
                                    ExactPosition(6),
                                    strand=1),
        "type": "read",
        "id": "<unknown id>",
        "qualifiers": {
            "label": ["sequence"],
            "ApEinfo_fwdcolor": ["#DAFFCF"],
            "ApEinfo_revcolor": ["#DFFDFF"],
        },
    }

    assert s.lcs("GGATCC", limit=4).__dict__ == expected.__dict__
    assert s.lcs(Seq("GGATCC"), limit=4).__dict__ == expected.__dict__
    assert (s.lcs(BSeqRecord(Seq("GGATCC"), name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
    assert s.lcs(Dseq("GGATCC"), limit=4).__dict__ == expected.__dict__
    assert (s.lcs(Dseqrecord(Dseq("GGATCC"), name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
    assert (s.lcs(Dseqrecord("GGATCC", name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
Esempio n. 13
0
 def get_feature_location(self):
     # Coordinate is 0-based in Biopython object
     start = BeforePosition(
         self.left - 1) if self.left_partial else ExactPosition(self.left -
                                                                1)
     end = AfterPosition(
         self.right) if self.right_partial else ExactPosition(self.right)
     return FeatureLocation(start, end, strand=self.strand)
Esempio n. 14
0
 def test_all_combos(self):
     expected = [
         FeatureLocation(ExactPosition(0), ExactPosition(66), strand=1)
     ]
     for start in ('ATG', 'GTG', 'TTG'):
         for stop in ('TAA', 'TAG', 'TGA'):
             seq = "{}{}{}".format(start, "N" * 60, stop)
             self.run_both_dirs(expected, seq)
Esempio n. 15
0
 def _exact(csrange):
     ''' An internal static function to generate an exact feature 
         location. '''
     from Bio.SeqFeature import ExactPosition, FeatureLocation
     start_pos = csrange[0]
     stop_pos = csrange[-1]+1
     start_exact = ExactPosition(start_pos)
     stop_exact = ExactPosition(stop_pos)
     return FeatureLocation(start_exact, stop_exact)
Esempio n. 16
0
    def partitionLines(self, split_factor=1.05):
        avgRowLength = int(
            float(self.genome_length) / float(self.rows * split_factor))

        fake_count = 100

        items = []
        for i in range(fake_count):
            key = int(float(self.genome_length * i) / fake_count)
            items.append(FeatureLocation(key, key, strand=1))

        for x in self.classes:
            if self.classes[x].included:
                items += [y.location for y in self.classes[x].objects]

        longest_last_object = 1
        thisRowEnd = 1 + avgRowLength
        currentRow = 1
        _internal_maxrowlength = 0

        rowData = {1: {"start": ExactPosition(1)}}

        for item in sorted(items, key=lambda x: x.start):
            if item.start >= thisRowEnd or item.end > thisRowEnd:

                if self.justified or item.start >= rowData[currentRow]["end"]:
                    rowData[currentRow]["end"] = thisRowEnd
                else:
                    rowData[currentRow]["end"] = max(longest_last_object,
                                                     item.start)

                _internal_maxrowlength = max(
                    _internal_maxrowlength,
                    rowData[currentRow]["end"] - rowData[currentRow]["start"],
                )

                currentRow += 1
                rowData[currentRow] = {}

                if item.start <= rowData[currentRow - 1]["end"]:
                    rowData[currentRow]["start"] = item.start
                else:
                    rowData[currentRow]["start"] = rowData[currentRow -
                                                           1]["end"] + 1

                thisRowEnd = avgRowLength + rowData[currentRow]["start"]

        thisRowEnd = rowData[currentRow]["end"] = ExactPosition(
            self.genome_length + 1)

        _internal_maxrowlength = max(
            _internal_maxrowlength,
            rowData[currentRow]["end"] - rowData[currentRow]["start"],
        )

        return rowData, avgRowLength, _internal_maxrowlength
 def getLocation(self, left, right, strand, partial_flag="00"):
     """partialFlag = {00:both ends existing, 10:left-end missing, 01:right-end missing, 00:both-ends missing}"""
     strand = 1 if (strand == "+" or strand == "1" or strand == 1) else -1
     leftPosition = BeforePosition(
         int(left) -
         1) if partial_flag[0] == "1" else ExactPosition(int(left) - 1)
     rightPosition = AfterPosition(
         int(right)) if partial_flag[1] == "1" else ExactPosition(
             int(right))
     return FeatureLocation(leftPosition, rightPosition, strand=strand)
Esempio n. 18
0
 def test_single_contained(self):
     expected = [
         FeatureLocation(ExactPosition(0), ExactPosition(66), strand=1)
     ]
     self.run_both_dirs(expected, "ATG" + "N" * 60 + "TAG")
     self.run_both_dirs(expected, "ATG" + "N" * 60 + "TAGNNN")
     expected = [
         FeatureLocation(ExactPosition(3), ExactPosition(69), strand=1)
     ]
     self.run_both_dirs(expected, "NNNATG" + "N" * 60 + "TAG")
     self.run_both_dirs(expected, "NNNATG" + "N" * 60 + "TAGNNN")
Esempio n. 19
0
	def gb(self):
		g = SeqRecord(
			Seq(self.sequence(),IUPAC.IUPACUnambiguousDNA()),
			id=self.name[0:8],
			name=self.name[0:8],
			description=self.description
		)
		g.features = [SeqFeature(
			FeatureLocation(ExactPosition(f.start-1),ExactPosition(f.end)), 
			f.type, qualifiers=dict([[q.name,q.data] for q in f.qualifiers.all()])) 
			for f in self.features()]
		return g.format('genbank')
Esempio n. 20
0
 def test_multi_start_single_stop(self):
     seq = "ATGNNNATG" + "N" * 60 + "TAG"
     expected = [
         FeatureLocation(ExactPosition(0), ExactPosition(72), strand=1)
     ]
     assert expected == [
         feat.location for feat in find_all_orfs(DummyRecord(seq=seq))
     ]
     seq = str(DummyRecord(seq=seq).seq.reverse_complement())
     expected[0].strand = -1
     assert expected == [
         feat.location for feat in find_all_orfs(DummyRecord(seq=seq))
     ]
Esempio n. 21
0
def split_gbk(seq_records, outname, format = False):
    import re
    from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition
    output_handle = open(outname, "w")

    merged_record = ''
    fasta_record = False
    for i, record in enumerate(seq_records):
        print i

        for feature in record.features:
            if feature.type == "fasta_record":
                fasta_record = True
                merged_record+=record[feature.location.start:feature.location.end]
                merged_record += "N" * 200
                my_start_pos = ExactPosition(len(merged_record)-200)
                my_end_pos = ExactPosition(len(merged_record))
                my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
                my_feature = SeqFeature(my_feature_location, type="assembly_gap")
                merged_record.features.append(my_feature)
               
            elif feature.type == 'source' and fasta_record == False:
                merged_record+=record[feature.location.start:feature.location.end]
                merged_record += "N" * 200
                my_start_pos = ExactPosition(len(merged_record)-200)
                my_end_pos = ExactPosition(len(merged_record))
                my_feature_location = FeatureLocation(my_start_pos,my_end_pos)
                my_feature = SeqFeature(my_feature_location, type="assembly_gap")
                merged_record.features.append(my_feature)
            

    to_remove = []
    for n, feature in enumerate(merged_record.features):
        if (feature.type == 'source') or (feature.type == "fasta_record"):
           to_remove.append(n)
           

    for index in sorted(to_remove, reverse=True):
        if index != 0:
            #print index
            del merged_record.features[index]

    merged_record.id = seq_records[0].annotations["accessions"][-1]
    try:
        merged_record.description = "%s" % seq_records[0].annotations["organism"]
    except:
        merged_record.description = 'Unkown bacteria'
    merged_record.annotations = seq_records[0].annotations
    merged_record.name = seq_records[0].annotations["accessions"][-1]
    return merged_record[0:-200]
Esempio n. 22
0
def test_translate_one1() -> None:
    seq = Seq('ACTGGCG')  # ref @ 4 is G
    location = CompoundLocation( [FeatureLocation(ExactPosition(0), ExactPosition(6), 1), \
            FeatureLocation(ExactPosition(8), ExactPosition(11), strand=1)], 'join')
    cds = SeqFeature(location=location)

    expected = TResult(position=3, alt='A', codon_position=3, ref_codon='GGC', alt_codon='AGC', in_coding_region=True, \
            ref_aa='G', alt_aa='S', synonymous=False, alt_is_invalid_stop=False)

    actual_result = translate_one(seq, [cds], 3, 'A')

    eq_(
        expected, actual_result,
        f"\n\nGiven {seq._data}:\n\nexpected: {expected} \n actual: {actual_result}"
    )
Esempio n. 23
0
 def test_exact(self):
     """Features: write/read simple exact locations."""
     #Note we don't have to explicitly give an ExactPosition object,
     #and integer will also work:
     f = SeqFeature(FeatureLocation(10, 20), strand=+1, type="CDS")
     self.assertEqual(_insdc_feature_location_string(f), "11..20")
     self.record.features.append(f)
     f = SeqFeature(FeatureLocation(30, 40), strand=-1, type="CDS")
     self.assertEqual(_insdc_feature_location_string(f),
                      "complement(31..40)")
     self.record.features.append(f)
     f = SeqFeature(FeatureLocation(ExactPosition(50),ExactPosition(60)), \
                    strand=+1, type="CDS")
     self.assertEqual(_insdc_feature_location_string(f), "51..60")
     self.record.features.append(f)
     self.write_read_check()
Esempio n. 24
0
def add_gaps(gbk_record, start_end_list):

    from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition

    merged_rec = ''

    for start, end in start_end_list:
        #print start, end
        my_start_pos = ExactPosition(start)
        my_end_pos = ExactPosition(end)
        my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
        my_feature = SeqFeature(my_feature_location, type="assembly_gap")
        gbk_record.features.append(my_feature)

    #print gbk_record[40000:50000].features
    return gbk_record
Esempio n. 25
0
 def parse_position(string: str):
     """ Converts a positiong from a string into a Position subclass """
     if string[0] == '<':
         return BeforePosition(int(string[1:]))
     if string[0] == '>':
         return AfterPosition(int(string[1:]))
     if string == "UnknownPosition()":
         return UnknownPosition()
     return ExactPosition(int(string))
Esempio n. 26
0
    def test_unknown_position(self):
        location = FeatureLocation(ExactPosition(1),
                                   UnknownPosition(),
                                   strand=1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, ExactPosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, UnknownPosition)
Esempio n. 27
0
def getgenefromgbk(gbkfile, location):  # change to work with locations
    """parses a genesequence from a gbk file using the gene location
    parameters
    ----------
    gbkfile
        string, path to gbk file + file
    location
        string of coordinates, example: "[start:end>](+)"
    returns
    ----------
    ret = DNA sequence of housekeepinggene from featurelocation
          coordinates
    abs_loc = validation, contains the location of HG on specific
              scaffold. [scaffold, start, end]
    """
    ret = ""
    scaff_number, start, end, strand = location.split(",")
    scaff_number = int(scaff_number)

    # Making the FeatureLocation
    f_start = BeforePosition(
        start.strip("<")) if "<" in start else ExactPosition(start)
    f_end = AfterPosition(end.strip(">")) if ">" in end else ExactPosition(end)
    f = FeatureLocation(f_start, f_end, int(strand))

    gbkcontents = SeqIO.parse(gbkfile, "genbank")
    for record in gbkcontents:
        record_no = record.name.split(".")[0]
        scaff_check = int(record_no[-3:])  # = scaffold number
        if scaff_check == scaff_number:
            DNA = record.seq
    ret = f.extract(DNA)  # The DNA sequence of the housekeepinggene

    # VALIDATION
    start = start.replace(">", "")
    start = start.replace("<", "")
    start = int(start)
    end = end.replace(">", "")
    end = end.replace("<", "")
    end = int(end)
    abs_loc = [scaff_number, start, end]
    return (ret, abs_loc)
Esempio n. 28
0
    def test_after_position(self):
        location = FeatureLocation(ExactPosition(1),
                                   AfterPosition(6),
                                   strand=1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, ExactPosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, AfterPosition)
        assert new_location.end == 6
Esempio n. 29
0
    def test_before_position(self):
        location = FeatureLocation(BeforePosition(1),
                                   ExactPosition(6),
                                   strand=-1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, BeforePosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, ExactPosition)
        assert new_location.end == 6
Esempio n. 30
0
def addTLAFeatures(genbank, fragment_list):
    """
	Function to add SNP lists to the genbank file. The only qualifier sofar is the 'name', which
	merges the SNP name with the MAF for visibility on SnapGene.

	There is an unfortunate bug in SnapGene where '1bp long' features are automatically converted to
	2 base pairs. I will contact Snapgene to try to get the issue resolved, although know that
	the frount of the feature is its location.
	"""
    count = 0
    for frag in fragment_list:
        location = FeatureLocation(ExactPosition(frag.start),
                                   ExactPosition(frag.end))
        tla_feature = SeqFeature(location,
                                 type='tla',
                                 id='tla',
                                 qualifiers={'label': 'TLA_Region_%i' % count})
        genbank.features.append(tla_feature)
        count += 1
    return genbank