def findFragendSites(fasta, resite): ''' Function creates FragendDict object. The object contains the location of all fragends for eachh strand of all chromosomes within a FASTA file. ''' # Process restriction enzyme size and create output dictionary resite = resite.upper() frags = {'resite': resite} # Create sequence object for resite and reverse complent standard = Seq(resite) revcomp = standard.reverse_complement() # Open and parse fasta file fastaHandle = open(fasta) fastaData = SeqIO.parse(fastaHandle,'fasta') # Loop through fasta file and extract fragend information for each chromosome for fasta in fastaData: # Extract name and sequence fName, fSequence = str(fasta.id), str(fasta.seq).upper() # Add re sites to dictionary using 1 based index forward = nt_search(fSequence, standard)[1:] if forward: frags[(fName,'+')] = [x + len(resite) for x in forward] else: frags[(fName,'+')] = [] reverse = nt_search(fSequence, revcomp)[1:] if reverse: frags[(fName,'-')] = [x + 1 for x in reverse] else: frags[(fName,'-')] = [] # Close input file and return data fastaHandle.close() return(frags)
def findFragendSites(fasta, resite): ''' Function creates FragendDict object. The object contains the location of all fragends for eachh strand of all chromosomes within a FASTA file. ''' # Process restriction enzyme size and create output dictionary resite = resite.upper() frags = {'resite': resite} # Create sequence object for resite and reverse complent standard = Seq(resite) revcomp = standard.reverse_complement() # Open and parse fasta file fastaHandle = open(fasta) fastaData = SeqIO.parse(fastaHandle, 'fasta') # Loop through fasta file and extract fragend information for each chromosome for fasta in fastaData: # Extract name and sequence fName, fSequence = str(fasta.id), str(fasta.seq).upper() # Add re sites to dictionary using 1 based index forward = nt_search(fSequence, standard)[1:] if forward: frags[(fName, '+')] = [x + len(resite) for x in forward] else: frags[(fName, '+')] = [] reverse = nt_search(fSequence, revcomp)[1:] if reverse: frags[(fName, '-')] = [x + 1 for x in reverse] else: frags[(fName, '-')] = [] # Close input file and return data fastaHandle.close() return (frags)
def target_seq(genome, query, filetype='fasta'): """ Finds a target sequence on a genome """ print('finding target sequences') if arguments().sp == 'cg': features_file = cg_features_file elif arguments().sp == 'sc': features_file = sc_features_file elif arguments().sp == 'ca': features_file = ca_features_file elif arguments().sp == 'sp': features_file = sp_features_file else: raise ValueError('Unknown species flag specified') chromes = list(pd.read_csv(features_file)['chrom'].unique()) if arguments().sp == 'ca': chromes = [str(chrom)[:9] for chrom in chromes if str(chrom)[8] == 'A'] if arguments().sp == 'sp': chromes = ['I', 'II', 'III'] chroms_locs = pd.DataFrame(np.nan, index=range(250000), columns=chromes) count = 0 total_length = 0 i = 0 for record in SeqIO.parse(genome, filetype): if arguments().sp == 'sp': if record.id in chromes: x = nt_search(str(record.seq), query) count += len(x[1:]) total_length += len(record.seq) chroms_locs[chromes[i]] = pd.Series(x[1:]) i += 1 else: pass else: x = nt_search(str(record.seq), query) count += len(x[1:]) total_length += len(record.seq) chroms_locs[chromes[i]] = pd.Series(x[1:]) i += 1 chroms_locs = chroms_locs.dropna(how='all') if arguments().sp == 'cg': chroms_locs.to_csv(dependencies_dir + cg_hermes_on_chr) elif arguments().sp == 'sc': chroms_locs.to_csv(dependencies_dir + sc_hermes_on_chr) elif arguments().sp == 'ca': chroms_locs.to_csv(dependencies_dir + ca_hermes_on_chr) elif arguments().sp == 'sp': chroms_locs.to_csv(dependencies_dir + sp_hermes_on_chr) else: raise ValueError('Unknown species flag specified') return chroms_locs
def testPrimerDirection(primers, record): self.assertGreaterEqual(len(nt_search( str(record.seq.upper()), str(primers[0].anneal_seq().upper().seq), )), 2) self.assertGreaterEqual(len(nt_search( str(record.seq.upper()), str(primers[1].anneal_seq().reverse_complement().upper().seq), )), 2)
def find_pam_sites(self): search_space = self.reference.seq pam_seq = self.pam.seq pam_len = len(pam_seq) fwd_hits = nt_search(str(search_space),str(pam_seq))[1:] rev_hits = nt_search(str(search_space.complement()),str(pam_seq)[::-1])[1:] i2ps = lambda i,s: SeqFeature(FeatureLocation(i,i+len(pam_seq)),type='pam_site',strand=s) pam_sites = [i2ps(i,1) for i in fwd_hits] pam_sites.extend([i2ps(i,-1) for i in rev_hits]) # pam_recs = SeqRecord(search_space,name='pam sites',features=pam_sites) self.pam_sites = pam_sites return pam_sites
def find_alignments(kmers): '''alignment function using nt_search''' my_align = {} for k in kmers: #print(k) my_align[str(k.seq)] = {r.id:nt_search(str(r.seq),str(k.seq))[1:] for r in refs} return my_align
def freq_appearance(file, query, gffutils_db, filetype='fasta'): """ Frequency of occurrence of a query sequence in a file :param file: sequence file :param query: sequence query (string) :param gffutils_db: gffutils all_features database :param filetype: parameter for SeqIO.parse (default='fasta') """ sc_features, chromes = get_features(gffutils_db) chroms_locs = pd.DataFrame(np.nan, index=range(250000), columns=chromes) count = 0 total_length = 0 i = 0 for record in SeqIO.parse(file, filetype): x = nt_search(str(record.seq), query) count += len(x[1:]) total_length += len(record.seq) chroms_locs[chromes[i]] = pd.Series(x[1:]) i += 1 print(total_length) print(count) print(float(count) / total_length) chroms_locs = chroms_locs.dropna(how='all') chroms_locs.to_csv('gc_seq_locations_on_chromosomes.csv') return chroms_locs, sc_features
def complex_pattern_search(sequence, pattern, outfile, strand='+'): """ Searching for pattern with biopyhon's nt_search(). This allows for ambiguous values, like N = A or T or C or G, R = A or G ... """ l = len(pattern) matches = nt_search(str(sequence.seq), pattern) bed_template = '%s\t%s\t%s\t%s\t%s\t%s\n' for match in matches[1:]: outfile.write(bed_template % (sequence.id, match, match+l, sequence.description, '', strand) )
def locateWord(ref, area, word): if area == None: return None seq = ref.seq[area[0]:area[1]] words = nt_search(str(seq.upper()), word.upper()) list1 = [] for i in words[1:]: list1.append([(area[0]+i), (area[0]+i+len(word))]) if list1: return list1 return None
def complex_pattern_search(sequence, pattern, outfile, strand='+'): """ Searching for pattern with biopyhon's nt_search(). This allows for ambiguous values, like N = A or T or C or G, R = A or G ... """ l = len(pattern) matches = nt_search(str(sequence.seq), pattern) bed_template = '%s\t%s\t%s\t%s\t%s\t%s\n' for match in matches[1:]: outfile.write( bed_template % (sequence.id, match, match + l, sequence.description, '', strand))
def testProduct(self): product = self.assembly.product(5) product.seq.alphabet = IUPAC.IUPACAmbiguousDNA() # TODO: use this alphabet for all sequences SeqIO.write(product, open('/tmp/output.gb', 'w'), 'genbank') # Compare the SS1 sequence of the predicted product with what is expected' ss1_up_seq = 'atgatgttgtcaaagagtatgcgtcgttaattttatctcgttgataccgg'.upper() ss1_down_seq = 'gcgtcctgcttgccagatgcgatgttgtagcatcttatccagcaaccagg'.upper() product_ss1_seq = product[ nt_search(str(product.seq), ss1_up_seq)[1]: nt_search(str(product.seq), ss1_down_seq)[1]+len(ss1_down_seq)] expected_record = SeqIO.read( 'sequences_v2/stage-5-ss1-v2-vioabedc-gs-with-increased-vioedc-max-expression.gb', 'genbank') expected_ss1_seq = expected_record[ nt_search(str(expected_record.seq), ss1_up_seq)[1]: nt_search(str(expected_record.seq), ss1_down_seq)[1]+len(ss1_down_seq)] self.assertEqual(str(product_ss1_seq.seq), str(expected_ss1_seq.seq))
def search_for_substring(promoter_list, seq_to_search): search_array = [] for promoter in promoter_list: search_results = nt_search(str(seq_to_search), promoter) search_array.append(search_results) return search_array
def find_positions(seqfile, queryseq, filetype='fasta'): """ Finds positions of a query sequence in a file """ genome = SeqIO.parse(seqfile, filetype) positions = nt_search(str(genome), queryseq) return positions
def search_seqs(self, seqrec, in_seq, locus, run=0, partial_ann=None): """ search_seqs - method for annotating a BioPython sequence without alignment :param seqrec: The reference sequence :type seqrec: SeqRecord :param locus: The gene locus associated with the sequence. :type locus: str :param in_seq: The input sequence :type in_seq: SeqRecord :param run: The number of runs that have been done :type run: int :param partial_ann: A partial annotation from a previous step :type partial_ann: :ref:`ann` :rtype: :ref:`ann` Example usage: >>> from Bio.Seq import Seq >>> from seqann.seq_search import SeqSearch >>> inseq = Seq('AGAGACTCTCCCGAGGATTTCGTGTACCAGTTTAAGGCCATGTGCTACTTCACC') >>> sqsrch = SeqSearch() >>> ann = sqsrch.search_seqs(refseqs, inseq) """ # Extract out the sequences and feature names # from the reference sequences # The mapped features will be subtracted from seq_covered # so the final seq_covered number will reflect the remaining # number of base pairs that haven't been mapped. # # The coordinates and mapping will help determine what positions # in the sequence have been mapped and to what features. The # missing blocks variable will be generated using these. structures = get_structures() seq_covered = len(in_seq.seq) coordinates = dict( map(lambda x: [x, 1], [i for i in range(0, len(in_seq.seq) + 1)])) mapping = dict( map(lambda x: [x, 1], [i for i in range(0, len(in_seq.seq) + 1)])) ambig_map = {} found_feats = {} feat_missing = {} method = "nt_search" if not partial_ann else partial_ann.method # If the partial annotation is provided # then make the found_feats equal to # what has already been annotated feats = get_features(seqrec) if partial_ann: found_feats = partial_ann.features if self.verbose and self.verbosity > 4: self.logger.info("Found partial features:") for f in found_feats: self.logger.info(f) # Skip references that only have features # that have already been annoated if len([f for f in feats if f in found_feats]) == len(feats): if self.verbose: self.logger.info("Skipping incomplete refseq") return partial_ann if self.verbose and self.verbosity > 1: self.logger.info("Using partial annotation | " + locus + " " + str(len(partial_ann.features))) coordinates = dict( map(lambda l: [l, 1], [ item for sublist in partial_ann.blocks for item in sublist ])) seq_covered = partial_ann.covered mapping = partial_ann.mapping if self.verbose and self.verbosity > 2: self.logger.info("Partial sequence coverage = " + str(seq_covered)) self.logger.info("Partial sequence metho = " + method) added_feat = {} deleted_coords = {} for feat_name in sorted(feats, key=lambda k: structures[locus][k]): # skip if partial annotation is provided # and the feat name is not one of the # missing features if partial_ann and feat_name not in partial_ann.refmissing: if self.verbose and self.verbosity > 1: self.logger.info("Skipping " + feat_name + " - Already annotated") continue if self.verbose and self.verbosity > 1: self.logger.info("Running seqsearch for " + feat_name) # Search for the reference feature sequence in the # input sequence. Record the coordinates if it's # found and if it's found in multiple spots. If it # is not found, then record that feature as missing. seq_search = nt_search(str(in_seq.seq), str(feats[feat_name])) if len(seq_search) == 2: if self.verbose and self.verbosity > 0: self.logger.info("Found exact match for " + feat_name) seq_covered -= len(str(feats[feat_name])) end = int(len(str(feats[feat_name])) + seq_search[1]) if feat_name == 'three_prime_UTR' \ and len(str(in_seq.seq)) > end: end = len(str(in_seq.seq)) # If the feature is found and it's a five_prime_UTR then # the start should always be 0, so insertions at the # beinging of the sequence will be found. start = seq_search[1] if feat_name != 'five_prime_UTR' else 0 si = seq_search[1]+1 if seq_search[1] != 0 and \ feat_name != 'five_prime_UTR' else 0 # check if this features has already been mapped mapcheck = set( [0 if i in coordinates else 1 for i in range(si, end + 1)]) # Dont map features if they are out of order skip = False if found_feats and len(found_feats) > 0: for f in found_feats: o1 = structures[locus][feat_name] o2 = structures[locus][f] loctyp = loctype(found_feats[f].location.start, found_feats[f].location.end, start, end) if o1 < o2 and loctyp: skip = True if self.verbose: self.logger.info("Skipping map for " + feat_name) elif o2 < o1 and not loctyp: skip = True if self.verbose: self.logger.info("Skipping map for " + feat_name) if 1 not in mapcheck and not skip: for i in range(si, end + 1): if i in coordinates: if feat_name == "exon_8" or feat_name == 'three_prime_UTR': deleted_coords.update({i: coordinates[i]}) del coordinates[i] else: if self.verbose: self.logger.error( "seqsearch - should't be here " + locus + " - " + " - " + feat_name) mapping[i] = feat_name found_feats.update({ feat_name: SeqFeature(FeatureLocation(ExactPosition(start), ExactPosition(end), strand=1), type=feat_name) }) if feat_name == "exon_8" or feat_name == 'three_prime_UTR': added_feat.update({feat_name: feats[feat_name]}) if self.verbose and self.verbosity > 3: self.logger.info("Coordinates | Start = " + str(start) + " - End = " + str(end)) elif (len(seq_search) > 2): if self.verbose and self.verbosity > 1: self.logger.info("Found " + str(len(seq_search)) + " matches for " + feat_name) new_seq = [seq_search[0]] for i in range(1, len(seq_search)): tnp = seq_search[i] + 1 if seq_search[i] in coordinates or tnp in coordinates: new_seq.append(seq_search[i]) seq_search = new_seq if (partial_ann and feat_name == "exon_8" and run > 0): missing_feats = sorted(list(partial_ann.missing.keys())) # * HARD CODED LOGIC * # # > exon8 in class I maps to multiple spots in a sequence, # often in the 3' UTR. These features need to be mapped # last to make sure it's not mapping exon8 incorrectly. if (missing_feats == ['exon_8', 'three_prime_UTR'] and len(seq_search) <= 3): if self.verbose and self.verbosity > 0: self.logger.info("Resolving exon_8") seq_covered -= len(str(feats[feat_name])) end = int(len(str(feats[feat_name])) + seq_search[1]) # If the feature is found and it's a five_prime_UTR then # the start should always be 0, so insertions at the # beinging of the sequence will be found. start = seq_search[1] si = seq_search[1] + 1 if seq_search[1] != 0 else 0 # check if this features has already been mapped mapcheck = set([ 0 if i in coordinates else 1 for i in range(si, end + 1) ]) for i in range(si, end + 1): if i in coordinates: del coordinates[i] else: if self.verbose: self.logger.error( "seqsearch - should't be here " + locus + " - " + " - " + feat_name) mapping[i] = feat_name found_feats.update({ feat_name: SeqFeature(FeatureLocation(ExactPosition(start), ExactPosition(end), strand=1), type=feat_name) }) if self.verbose and self.verbosity > 0: self.logger.info("Coordinates | Start = " + str(start) + " - End = " + str(end)) else: if self.verbose and self.verbosity > 0: self.logger.info("Adding ambig feature " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) ambig_map.update( {feat_name: seq_search[1:len(seq_search)]}) else: if self.verbose and self.verbosity > 0: self.logger.info("Adding ambig feature " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) ambig_map.update( {feat_name: seq_search[1:len(seq_search)]}) else: if self.verbose and self.verbosity > 1: self.logger.info("No match for " + feat_name) feat_missing.update({feat_name: feats[feat_name]}) blocks = getblocks(coordinates) exact_matches = list(found_feats.keys()) # * HARD CODED LOGIC * # # > # # HLA-DRB1 exon3 exact match - with intron1 and 3 missing if ('exon_3' in exact_matches and run == 99 and locus == 'HLA-DRB1' and 'exon_2' in feat_missing and (len(blocks) == 1 or len(blocks) == 2)): for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): featname = "intron_3" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0] - 1), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) else: featname = "exon_2" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0]), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) seq_covered -= len(b) if self.verbose and self.verbosity > 1: self.logger.info( "Successfully annotated class DRB1 II sequence") return Annotation(features=found_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, mapping=mapping, exact_match=exact_matches) # If it's a class II sequence and # exon_2 is an exact match # * HARD CODED LOGIC * # # > It's common for exon2 to be fully sequenced # but intron_2 and intron_1 to be partially sequenced, # which can make it hard to annotate those to features. # If there are two missing blocks that is small enough # and they are before and after exon2, then it's very # very likely to be intron_2 and intron_1. if 'exon_2' in exact_matches and len(blocks) == 2 \ and is_classII(locus) and seq_covered < 300: if self.verbose and self.verbosity > 1: self.logger.info("Running search for class II sequence") r = True for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): x = b[0] - 1 else: x += 1 f = mapping[x] if f != 'exon_2': r = False if r: for b in blocks: x = b[len(b) - 1] if x == max(list(mapping.keys())): featname = "intron_2" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0] - 1), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) else: featname = "intron_1" found_feats.update({ featname: SeqFeature(FeatureLocation(ExactPosition(b[0]), ExactPosition(b[len(b) - 1]), strand=1), type=featname) }) seq_covered -= len(b) if self.verbose and self.verbosity > 1: self.logger.info( "Successfully annotated class II sequence") return Annotation(features=found_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, mapping=mapping, exact_match=exact_matches) annotated_feats, mb, mapping = self._resolve_unmapped( blocks, feat_missing, ambig_map, mapping, found_feats, locus, seq_covered) # * HARD CODED LOGIC * # if (not mb and blocks and len(feat_missing.keys()) == 0 and len(ambig_map.keys()) == 0): mb = blocks if mb: # Unmap exon 8 if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 3000 \ and 'exon_8' in exact_matches: for i in deleted_coords: mapping[i] = 1 coordinates.update(deleted_coords) mb = getblocks(coordinates) feat_missing.update(added_feat) # Delte from found features del exact_matches[exact_matches.index('exon_8')] del found_feats['exon_8'] if 'exon_8' in annotated_feats: del annotated_feats['exon_8'] if 'three_prime_UTR' in found_feats: del found_feats['three_prime_UTR'] if 'three_prime_UTR' in annotated_feats: del annotated_feats['three_prime_UTR'] refmissing = [ f for f in structures[locus] if f not in annotated_feats ] if self.verbose and self.verbosity > 1: self.logger.info("* Annotation not complete *") # Print out what features were missing by the ref if self.verbose and self.verbosity > 2: self.logger.info("Refseq was missing these features = " + ",".join(list(refmissing))) # Print out what features were ambig matches if self.verbose and self.verbosity > 1 and len(ambig_map) > 1: self.logger.info("Features with ambig matches = " + ",".join(list(ambig_map))) # Print out what features were exact matches if self.verbose and self.verbosity > 2 and len(exact_matches) > 1: self.logger.info("Features exact matches = " + ",".join(list(exact_matches))) # Print out what features have been annotated if self.verbose and self.verbosity > 1 and len( annotated_feats) > 1: self.logger.info("Features annotated = " + ",".join(list(annotated_feats))) # Print out what features are missing if self.verbose and self.verbosity > 1 and len(feat_missing) > 1: self.logger.info("Features missing = " + ",".join(list(feat_missing))) annotation = Annotation(features=annotated_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, blocks=mb, method=method, refmissing=refmissing, mapping=mapping, exact_match=exact_matches, annotation=None) else: mb = None # Unmap exon 8 if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 600 \ and 'exon_8' in exact_matches \ and 'three_prime_UTR' in annotated_feats\ and 'three_prime_UTR' not in exact_matches: for i in deleted_coords: mapping[i] = 1 coordinates.update(deleted_coords) mb = getblocks(coordinates) feat_missing.update(added_feat) del exact_matches[exact_matches.index('exon_8')] del found_feats['exon_8'] if 'exon_8' in annotated_feats: del annotated_feats['exon_8'] if 'three_prime_UTR' in found_feats: del found_feats['three_prime_UTR'] if 'three_prime_UTR' in annotated_feats: del annotated_feats['three_prime_UTR'] if self.verbose: self.logger.info("* No missing blocks after seq_search *") # Print out what features were ambig matches if self.verbose and self.verbosity > 0 and len(ambig_map) > 1: self.logger.info("Features with ambig matches = " + ",".join(list(ambig_map))) # Print out what features were exact matches if self.verbose and self.verbosity > 0 and len(exact_matches) > 1: self.logger.info("Features exact matches = " + ",".join(list(exact_matches))) # Print out what features have been annotated if self.verbose and self.verbosity > 0 and len( annotated_feats) > 1: self.logger.info("Features annotated = " + ",".join(list(annotated_feats))) # Print out what features are missing if self.verbose and self.verbosity > 0 and len(feat_missing) > 1: self.logger.info("Features missing = " + ",".join(list(feat_missing))) annotation = Annotation(features=annotated_feats, covered=seq_covered, seq=in_seq, missing=feat_missing, ambig=ambig_map, method=method, blocks=mb, mapping=mapping, exact_match=exact_matches, annotation=None) return annotation
totalreads = 0 totalspacers = 0 totalbadspacers = 0 def my_rev_complement(seq): return Seq(seq).reverse_complement() for seq in input: totalreads += 1 if ((totalreads % 100000) == 0): print(totalreads) posBf = nt_search(seq, repB) if len(posBf) > 1: posEf = nt_search(seq, repE) if len(posEf) > 1: spacer = seq[posBf[1] + 9:posEf[1]] spacer_rev = my_rev_complement(spacer) totalspacers += 1 output.write(">" + str(totalspacers) + "\n" + str(spacer_rev) + "\n") else: totalbadspacers += 1 badspacers.write(seq + "\n") else: totalbadspacers += 1 badspacers.write(seq + "\n")
def match_target(self, pam): match = nt_search(str(self.get_target(len(pam))), pam) return len(match) > 1
print structure, start, end print aln, score #now guess anticodon and possible_aa based on pref code table (dictionary) left, right, not_bad = acodon_loop(structure) possible_aa = [] possible_anticodon = [] possible_offset = [] possible_structure = [] no_left_arm = 'S' in product if not_bad: # bad case happens if anticodon loop is not found, record will not be stored # try to minimize those by sophisticating detection if verbose: print 'not bad', for aa in pref_code: hits = nt_search(aln[left:right], pref_code[aa]) if len(hits) > 1: possible_aa.append(aa) offset = int(hits[1]) + left # offset = int(hits[1]) + left + 1 ##this is not used internally so must be in non-pythonic coordinates! Add "1" for pythonic god! possible_offset.append(offset) possible_anticodon.append(aln[offset + 1:offset + 4]) if verbose: print aa, aln[offset + 1:offset + 4], #ended up with 3 lists in the same order: #one with possible aa [0] #the second with respective (real) anticodons [4], #third with their relative positions [6] #make separate record for each valid aa+anticodon combination #avoiding the need for sanitization of product
rev_complemento_sequencia2 = sequencia2.reverse_complement() print("Complemento reverso da Sequência 1: %s" % rev_complemento_sequencia1) print("Complemento reverso da Sequência 2: %s" % rev_complemento_sequencia2) #################### FLUXO DA INFORMAÇÃO GENÉTICA ########################### ##### ##### ################### Transcrição ################# rna_sequencia1 = sequencia1.transcribe() print("RNA da Sequência 1: %s" % rna_sequencia1) #################### Transcrição reversa ################# dna2 = rna_sequencia1.back_transcribe() print("Sequência 1 original\tSequência 1 após transcrição reversa") print("%s\t%s" % (sequencia1, dna2)) ################### Tradução ####################### # Possíveis erros: comprimento da sequência não é múltiplo de 3. proteina_sequencia1 = sequencia1.translate() print("Sequência de aminoácidos do RNA de Sequência 1: %s" % proteina_sequencia1) print("Sequência de aminoácidos da Sequência 1: %s" % sequencia1.translate()) ## conteudo GC print(GC(sequencia1)) ## buscando sub-sequencia print(nt_search(str(sequencia1), "TCGA"))