def check(self, seqs_as_strs, alpha): # Using Seq objects: m = motifs.create([Seq(s) for s in seqs_as_strs], alpha) m.weblogo(os.devnull) # Using strings: m = motifs.create(seqs_as_strs, alpha) m.weblogo(os.devnull)
def sequence_content_analysis(metrics, whitelist, barcode="r1", head=5000000): from Bio import motifs from Bio.Seq import Seq r1 = metrics.head(head).index.get_level_values(barcode).to_series() in_ = r1.isin(whitelist.tolist()) r1_right = r1[in_] r1_wrong = r1[~in_] motif_right = motifs.create([Seq(x) for x in r1_right if "N" not in x]) motif_wrong = motifs.create([Seq(x) for x in r1_wrong if "N" not in x]) r = pd.DataFrame(motif_right.pwm) w = pd.DataFrame(motif_wrong.pwm) c = np.log2(w / r) fig, axis = plt.subplots(1, 3, figsize=(3 * 3, 3)) kwargs = {"square": True} sns.heatmap(r.T, ax=axis[0], **kwargs) axis[0].set_title("Correct") sns.heatmap(w.T, ax=axis[1], **kwargs) axis[1].set_title("Wrong") kwargs = {"square": True, "cmap": "RdBu_r", "center": 0, "robust": True} sns.heatmap(c.T, ax=axis[2], **kwargs) axis[2].set_title("Difference") fig.savefig(args.output_prefix + f"barcode_{barcode}.sequence_content.svg", bbox_inches="tight", dpi=300)
def check(self, seqs_as_strs, alpha): # Using Seq objects and passing exactly the same alphabet: m = motifs.create([Seq(s, alpha) for s in seqs_as_strs], alpha) m.weblogo(os.devnull) # Using Seq objects but not passing alphabet: m = motifs.create([Seq(s, alpha) for s in seqs_as_strs]) m.weblogo(os.devnull) # Using strings and passing alphabet: m = motifs.create(seqs_as_strs, alpha) m.weblogo(os.devnull)
def createMotif(sequences, fname): ''' Creates and saves a motif (logo) from a list of input sequences. Input: list of strings with the sequences; file path to save the logo. Output: an image file with the logo. ''' print 'Generating motif ' + timeStamp() try: os.makedirs('figure') except OSError: if not os.path.isdir('figure'): raise from Bio.Seq import Seq from Bio import motifs from Bio.Alphabet import IUPAC import urllib2 # m = motif.motif(alphabet=IUPAC.unambiguous_dna) # initialize motif instances = [] for sequence in sequences: if len(sequence) < 40: print sequence instances.append(Seq(sequence, alphabet=IUPAC.ambiguous_dna)) m = motifs.create(instances) flogo = 'figure/' + fname while True: # source: http://stackoverflow.com/a/9986206/1274242 try: m.weblogo(flogo, format='SVG') break except urllib2.HTTPError, detail: if detail.errno == 500: time.sleep(5) continue else: raise
def create_motif_from_fasta_file(fasta_filename, out_filename, generate_pssm=False): instances = [] with open(fasta_filename) as in_handle: for title, seq in SimpleFastaParser(in_handle): instances.append(Seq(seq, IUPAC.protein)) m = motifs.create(instances, IUPAC.protein) m.weblogo(out_filename, show_xaxis=False, show_yaxis=False, show_errorbars=False, unit_name='', show_fineprint=False, format='pdf') if generate_pssm: pssm_file = open(out_filename[:-10] + "_pssm.txt", "w+") for i in range(len(m.pwm)): for j in range(len(m.pwm[i])): pssm_file.write(str(m.pwm[i][j])) pssm_file.write("\t") pssm_file.write("\n") pssm_file.close()
def __init__(self, sites, TF, name, pseudocounts=1): self._TF = TF self._name = name instances = [Seq(site, unambiguous_dna) for site in sites] self._motif = motifs.create(instances) self._motif.pseudocounts = pseudocounts self._motif.name = self.TF.accession_number + '(%s)' % self.name
def create_motifs(tf_instance_id, collect_tf_motifs): ''' Searches json for the motif instance passed into the function and constructs motif object from the instance's aligned binding site. Parameters ---------- tf_instance_id: String Returns ------- motif: motifs object Constructed motif object from aligned binding sites, name is set to tf instance id -1: Int Returns -1 when the tf_instance was not found ''' entry = list( filter(lambda motif: motif['tf_instance'] == tf_instance_id, collect_tf_motifs['all_motifs'])) if (len(entry) > 0): motif = motifs.create(entry[0]['aligned_binding_sites']) motif.name = entry[0]['tf_instance'] return motif else: return -1
def compute_positional_weight_matrix(seqs, length=None, pseudocounts=0.5): if not length: length = min(len(s) for s in seqs) alphabet = IUPAC.protein bio_seqs = [Seq(p[:length], alphabet=alphabet) for p in seqs] m = motifs.create(bio_seqs, alphabet=alphabet) return m.counts.normalize(pseudocounts=pseudocounts)
def createlogo(infiles, outfile, title, scale): """ Creates a sequence logo in PNG format from a textfile of aligned sequences in flat format. Arguments: - infile - name of the flat format alignment file. - outfile - name of the png logo file. - title - title for the logo. - scale - yaxis scale, usually 1.0 return: None """ # input a text file with all sequences aligned in flat format. for fil in infiles: instances = [] for line in fil: # for each line in the file create a seq-object from it # and append this object to a list. instances.append(Seq(line.strip(), alphabet=IUPAC.ambiguous_dna)) # create a motif-object. The last sequence in instances # is empty, so skip it. m = motifs.create(instances[:-1], alphabet=IUPAC.ambiguous_dna) # create the sequence logo m.weblogo(outfile, logo_title=title, yaxis_scale=scale, stack_width="large") #,alphabet='ambiguous_dna_alphabet')
def createPSSM(): print "Start PSSM" #sequencelist = sequencelist.replace("-", ".") list = [] for seq_record in SeqIO.parse("fastatmp", "fasta", IUPAC.unambiguous_dna): list.append(str(seq_record.seq)) #Blast typical sequence result_handle = NCBIWWW.qblast("blastn", "nt", list[0]) save_file = open("my_blast.xml", "w") save_file.write(result_handle.read()) save_file.close() result_handle.close() #motifs.create(test, alphabet=Gapped(IUPAC.unambiguous_dna)) m = motifs.create(list, alphabet=Gapped(IUPAC.unambiguous_dna)) print "motif created" pwm = m.counts.normalize(pseudocounts=0.25) print "PWM done" pssm = pwm.log_odds() print "PSSM done" print pssm return pssm
def main(*args, **kwargs): fpath = os.path.join(os.getcwd(),args[-1]) instances = list() for record in SeqIO.parse(str(fpath),'fasta'): instances.append(record.seq) m = motifs.create(instances) consensus = m.consensus print consensus profile = m.counts print 'A:', for elem in profile['A']: print elem, print '\nC:', for elem in profile['C']: print elem, print '\nG:', for elem in profile['G']: print elem, print '\nT:', for elem in profile['T']: print elem,
def forcemotif(sequences): from Bio import motifs from Bio.Seq import Seq if len(sequences) != 0: return (motifs.create(sequences)) else: return ([])
def printAlignmentInfo(alignment, alphabet): seqlist = [] for record in alignment: seqlist.append(record.seq) m = motifs.create(seqlist, alphabet) pwm = m.counts.normalize() consensus = pwm.consensus summary_align = AlignInfo.SummaryInfo(alignment) consensus2 = summary_align.dumb_consensus() my_pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=['N']) print(alignment) print('first description: %s' % alignment[0].description) print('first sequence: %s' % alignment[0].seq) print('length %i' % alignment.get_alignment_length()) print('matrix pwm %s' % pwm) print('consensus (motifs) %s' % consensus) print('matrix pssm %s' % my_pssm) print('consensus (AlignInfo.SummaryInfo) %s' % consensus2) return
def motif2bed(motif, fasta, reverse_strand=True, output=()): motif_bed = [] m = motifs.create([Seq(motif)]) #iterate over fasta file and search for motifs for record in SeqIO.parse(open(fasta, "r"), "fasta"): chrom = record.id #Check forward strand for pos, seq in m.instances.search(str(record.seq)): start_pos = pos stop_pos = pos + len(seq) motif_bed = motif_bed + [[ str(chrom), str(start_pos), str(stop_pos), str(motif), ".", "+" ]] #Check reverse strand if reverse_strand: for pos, seq in m.reverse_complement().instances.search( str(record.seq)): start_pos = pos stop_pos = pos + len(seq) motif_bed = motif_bed + [[ str(chrom), str(start_pos), str(stop_pos), str(motif), ".", "-" ]] if output: with open(output, 'w') as file: file.writelines('\t'.join(i) + '\n' for i in motif_bed) else: return (motif_bed)
def create_nucleotide_frequency_from_fasta(path_to_fasta, sequence_length=None, verbose=False): """Create a pandas.DataFrame with the frequency of each nucleotide on each position. Sequences are converted to uppercase DNA and saved as IUPAC.ambiguous_dna, however only the results for A, C, T and G are returned. :param path_to_fasta: path to the fasta file :sequence_length: assumed length of the input sequences. If not provided length of the first sequence will be chosen. It will be check and any sequece of different length will be ignored :returns: 2-tuple: pandas.DataFrame, Bio.motifs.Motif """ seqs = [] number_of_ignored = 0 seq_counter = 0 if not sequence_length: for rec in SeqIO.parse(path_to_fasta, 'fasta'): sequence_length = len(rec.seq) break sys.stderr.write("Presumed sequence length was not provided. First encounterd sequence length" + " will be used i.e. %i\n" % sequence_length) for rec in SeqIO.parse(path_to_fasta, 'fasta'): seq_counter += 1 if len(rec.seq) != sequence_length: if verbose: sys.stderr.write("%s has wrong length (%i)." % (rec.id, len(rec.seq)) + " It will be ignored.\n") number_of_ignored += 1 else: seqs.append(Seq.Seq(str(rec.seq).upper().replace("U", "T"), IUPAC.ambiguous_dna)) motifs_obj = motifs.create(seqs, IUPAC.ambiguous_dna) frequency_df = DataFrame(motifs_obj.pwm)[["A", "C", "T", "G"]] sys.stderr.write("%i sequences out of %i was ignored because of the length issue.\n" % (number_of_ignored, seq_counter)) return frequency_df, motifs_obj
def setUp(self): self.m = motifs.create( [ Seq("UACAA"), Seq("UACGC"), Seq("UACAC"), Seq("UACCC"), Seq("AACCC"), Seq("AAUGC"), Seq("AAUGC") ], alphabet=IUPAC.unambiguous_rna, )
def setUp(self): self.m = motifs.create( [ Seq("TACAA"), Seq("TACGC"), Seq("TACAC"), Seq("TACCC"), Seq("AACCC"), Seq("AATGC"), Seq("AATGC") ], alphabet=IUPAC.extended_dna, )
def recursive_random(instances, motiflength, records): """ The main function in de sampler, this is the recursive alogorithm that will keep throwing away sequences and getting a new bets match until there is regression :param instances: the first motif attempts :param motiflength: the motif length you are searching for :param records: the original sequences :return: """ global gapSize old_total = check_solution(instances) for idx, instance in enumerate(instances): train_instances = copy.deepcopy(instances) leave_out = random.choice(train_instances) seq_index = instances.index(leave_out) if Config.max_gapsize == 0 or (gapList[seq_index] == 0 or gapList[seq_index] == 8): print("Leaving out %s" % leave_out) else: print("Leaving out %s" % leave_out[0:gapList[seq_index]] + "-" + leave_out[gapList[seq_index]:]) train_instances.remove(leave_out) train_motifs = motifs.create(train_instances) profile = create_pssm(train_motifs) leftseqs = [records[seq_index]] new_instances = get_best_matches(leftseqs, profile, motiflength, seq_index) print("new best instance:") for new_instance in new_instances: if Config.max_gapsize == 0 or (gapList[seq_index] == 0 or gapList[seq_index] == 8): print(new_instance) else: print("gapsize: " + str(gapSize)) print((new_instance[0:gapList[seq_index]] + "-" + new_instance[gapList[seq_index]:])) instances[seq_index] = new_instance # printing the result from this iteration total = check_solution(instances) profile = create_pssm(motifs.create(instances)) print("new solution: %d" % total) print("new profile: ") print(profile) # Check if there is no regression, if not continue the recursion else stop the program if total < old_total: return recursive_random(instances, motiflength, records) else: motif = motifs.create(instances) return (motif, total, gapSize)
def findmotif(s, t): """Prints all locations of s as a substring of t using 1-based numbering""" target_seq = Seq(s) motif = motifs.create([Seq(t)]) for pos, seq in motif.instances.search(target_seq): print(str(pos + 1), end=" ")
def setUp(self): self.m = motifs.create( [ Seq("ACDEG"), Seq("AYCRN"), Seq("HYLID"), Seq("AYHEL"), Seq("ACDEH"), Seq("AYYRN"), Seq("HYIID") ], alphabet=IUPAC.extended_protein, )
def main(): bases = ['A', 'C', 'G', 'T'] seqs = [] for i in SeqIO.parse('cons.fna', 'fasta'): seqs.append(i.seq) m = motifs.create(seqs) print(m.consensus) for j in bases: print(genCount(j, seqs, len(m)))
def out(): infile = e1.get() list2 = [] for record in SeqIO.parse(str(infile), "fasta"): list2.append(record.seq[0:int(lens)]) m = motifs.create(list2) win.destroy()
def get_consenus(data): inst = [] for i in data: inst.append(Seq(data[i])) m = motifs.create(inst) print(m.consensus) count_matrix = m.counts for key, value in count_matrix.items(): print(''.join(key + ': ' + ' '.join(map(str, value))))
def sym_permute_motif(cur_motif_sites): new_sites = sym_permute_sites(cur_motif_sites) new_motif_sites = [] for new_site in new_sites: new_motif_sites.append(Seq(new_site, IUPAC.unambiguous_dna)) new_motif = motifs.create(new_motif_sites) new_motif.pseudocounts = 1 new_motif.background = None return new_motif
def create_logo_from_fasta(fasta_path, logo_path): sequences=[] for record in SeqIO.parse(fasta_path, "fasta"): seq = record.seq.upper() if "N" not in str(seq) and "Y" not in str(seq): sequences.append(seq) motif = motifs.create(sequences) motif.weblogo(logo_path) print "Logo is available in " + logo_path return 0
def setUp(self): self.m = motifs.create([ Seq("TACAA"), Seq("TACGC"), Seq("TACAC"), Seq("TACCC"), Seq("AACCC"), Seq("AATGC"), Seq("AATGC") ])
def set_motifs(self, ppath): from Bio import motifs seqs = self.get_sequences_from_fasta(ppath) mot = motifs.create(seqs) self.mot = { 'A': mot.pwm['A'], 'C': mot.pwm['C'], 'G': mot.pwm['G'], 'T': mot.pwm['T'] }
def getMotif(new_rnglist): motif_list=[] for entry in new_rnglist: motif_seq=entry[4] motif_list.append(motif_seq) #make motif. make lc uppercase clip_motif=motifs.create([x.upper() for x in motif_list if 'N' not in x],alphabet=IUPAC.unambiguous_dna) #set pseudocounts clip_motif.pseudocounts=0.5 return clip_motif
def create_PWMdict(clusterdict): PWMdict = {} for key, value in clusterdict.items(): if key not in PWMdict.keys(): #还没有 PWMdict[key] = [] instances = trans_motif(value) m = motifs.create(instances) PWMdict[key].append(m.counts.normalize(pseudocounts=0)) #PWMdict[key].append(m.counts.normalize(pseudocounts={'A':0.6, 'C': 0.4, 'G': 0.4, 'T': 0.6})) return PWMdict
def main(args): """Consensus and Profile""" seqs = [record.seq for record in SeqIO.parse(args.dataset, 'fasta', generic_dna)] profile = motifs.create(seqs) print(profile.consensus) for base in 'ACGT': print(base + ':', ' '.join(str(count) for count in profile.counts[base]))
def visSeq(entry,degree,name): ''' This function visulizes one single entry of the format (count,seqes[DEGREE],gaps,dirs) ''' for i in range(degree): instances=[] for seq in entry[1][i]: instances.append(Seq(seq)) m=motifs.create(instances) m.weblogo(name+str(i)+'.png',format='PNG',stack_width='large',unit_name='probability',resolution='300',color_scheme='color_classic')
def check_solution(instances): """ gets the score of a motif list :param instances: the motifs :return: a score, the lower this score the better """ pssm = create_pssm(motifs.create(instances)) old_scores = list() for instance in instances: old_scores.append(pssm.calculate(instance)) return sum(old_scores)
def find(dic_all, ik): """ take dic from read_fasta() -looks for stretches of identies -looks for max stretch """ dic = dic_all dic_instances = {} #it=list(range(2,14,4)) it = [2, 8, 14] #idxe=list(range(0,33)) ttemp = pd.DataFrame() #ttemp=ttemp.reindex(idxe) count = 0 for key, value in dic.items(): #print (key, value) for t, p in value.items(): #print(t, p) if t == 'piRNA': temp = p if t == 'target': instances = [] for i in range(len(p)): if i < len(p) - (ik + 1): if p[i] == 'N': pass else: kk = i + ik count += 1 instances.append(Seq(p[i:kk])) m = motifs.create(instances) r = m.reverse_complement() #reverse ll = 0 for pos, seq in r.instances.search(temp): if key not in dic_instances.keys(): dic_instances[key] = pos + 1 #print(dic_instances) else: dic_instances[key + str(ll)] = pos + 1 ll += 1 tt = pd.DataFrame.from_dict(dic_instances, orient='index') #.T tt['{}-mer'.format(ik)] = tt[0] tt = tt.drop(axis=1, labels=0) ttemp = pd.concat([ttemp, tt], axis=1, sort=False) print('instances:', count) print(len(dic_instances)) return ttemp
def rename_tree2(path): names = [x.name for x in path] for n in range(len(names)): c = tree.common_ancestor({"name": "%s" % names[n]}) files = [x.name for x in c.get_terminals()] sequences = [s.split('_')[1] for s in files] for sequence in sequences: Seq(sequence, alphabet=IUPAC.unambiguous_rna) m = motifs.create(sequences, alphabet=IUPAC.unambiguous_rna) newname = tree2.find_any({"name": "%s" % c.name}) if newname is not None: newname.name = "%s" % m.degenerate_consensus
def calcutaltePssm(self, filePath): instance = list() for seq_record in SeqIO.parse(filePath, "fasta"): dna_seq = Seq(str(seq_record.seq).upper()) instance.append(dna_seq) # wmm m = motifs.create(instance) m.weblogo(os.path.splitext(os.path.basename(filePath))[0]+"Logo.png") pwm = m.counts.normalize(pseudocounts=0) pssm = pwm.log_odds() # pssm print pssm return pssm
def run(records, motiflength): # creat random instances of given motif size instanceref = get_random_instances(records, motiflength) print("found %d sequences" % len(instanceref)) print("Got random instances:") motif = motifs.create(instanceref) print(motif) print("Starting profile") prof = create_pssm(motif) print(prof) motif = recursive_random(instanceref, motiflength, records) return motif
def createMotif(self, file_handle): records = parse(file_handle, "fasta") logofile = self.file_session + "_logo.png" seqs_motif = [] for record in records: self.sequence = Seq(str(record.seq)) seqs_motif.append(self.sequence) seqs = motifs.create(seqs_motif) print(seqs.counts) seqs.weblogo(logofile) print("Weblogo saved.")
def motif_find(dna, motif_to_search): """Finds all instances of a motif in a string, returns str of start positions""" instances = [Seq(motif_to_search)] m = motifs.create(instances) listOfStartPostitions = "" for pos, seq in m.instances.search(dna): print("{0}, {1}".format(pos+1, seq)) listOfStartPostitions +="{0} ".format(pos+1) return print(listOfStartPostitions)
def create_pwm_arrays_from_grads(self, result_dir, data_dir, window_size=12, batch_size=128): from Bio import motifs from Bio.Seq import Seq # load sequences from file with open(os.path.join(result_dir, 'best_config', 'saliency.seqs'), 'r') as f: reader = csv.reader(f, delimiter='\t') seqs = [line[0] for line in reader] # identify windows of highest saliency for each sequence & extract subsequences salient_seqs = [] max_scores = [] with open(os.path.join(result_dir, 'best_config', 'saliency.scores'), 'r') as f: reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONNUMERIC) for i in range(len(seqs)): sal_scores = abs(np.array(reader.next())) window_scores = np.convolve(sal_scores, np.ones(window_size, dtype=int), 'valid') max_ind = np.argmax(window_scores) max_score = np.max(window_scores) salient_seqs.append(Seq(seqs[i][max_ind:max_ind + window_size])) max_scores.append(max_score) # filter out low scoring sequences??? skip for now... threshold = np.percentile(max_scores, 80) print('Threshold:' + str(threshold), 'Median: ' + str(np.median(max_scores))) salient_seqs = [ salient_seqs[i] for i in range(len(salient_seqs)) if max_scores[i] >= threshold ] # create motif from subsequences using BioPython with open(os.path.join(result_dir, 'best_config', 'numpy.pwm'), 'w') as f: writer = csv.writer(f, delimiter=' ') motif = motifs.create(salient_seqs) for nuc in ['A', 'C', 'G', 'T']: writer.writerow(motif.pwm[nuc])
def read_motif(motif_filename, verb=0): """Reads a motif as a collection of sites from a file Reads a motif and uses the biopython.motifs class to store it. If the motif is in FASTA format, it uses the parser directly. Otherwise, it loads and reads a concatenated text file and creates the motif. File type is determined by extension: * FASTA for .fas, .fasta and .fa files * One-per-line text file otherwise Input: * The motif filename; required * Verbose mode (default=0) Returns: * the read motif """ #create file handler for reading file try: motif_file = open(motif_filename,"r") except (IOError, OSError) as file_open_exception: print "*** The file name provided:", motif_filename, " does not exist" print "*** Error: ", file_open_exception.errno, " - ",\ file_open_exception.strerror sys.exit() #Figure out file type based on extension, read sites and create motif extension = motif_filename.split('.')[-1] if extension not in ['fas', 'fasta', 'fa']: if verb: print 'Reading motif... raw text sequence mode assumed \ (one site per line, not FASTA parsing)' sites = [] for line in motif_file: sites.append(Seq(line.rstrip('\n\r'),IUPAC.unambiguous_dna)) mot = motifs.create(sites) if verb: print mot.degenerate_consensus else: if verb: print 'Reading motif... attempting to parse FASTA file' mot = motifs.read(motif_file,'sites') motif_file.close() return mot
def _get_pwm(self, input_motif=list()): # seperate headers from sequences headers, instances = [list(x) for x in zip(*input_motif)] motif_seq = list() if self.alphabet == 'protein': alphabet = IUPAC.protein elif self.alphabet == 'rna': alphabet = IUPAC.unambiguous_rna else: alphabet = IUPAC.unambiguous_dna if self.gap_in_alphabet is True: alphabet = Gapped(alphabet, "-") for i in instances: # motif as Bio.Seq instance motif_seq.append(Seq(i, alphabet)) motif_obj = motifs.create(motif_seq) return motif_obj.counts.normalize(self.pseudocounts)
def __init__(self, binding_sites, name="", pseudocounts=1): """ Creates a PSSM scorer object. Accepts binding sites in the form of a path to a text file containing one site per line, or a list of Biopython Bio.Seq objects. """ self.name = name self.alphabet = Alphabet.IUPAC.unambiguous_dna self.path = None if type(binding_sites) == str: self.seqs = [Seq.Seq(site.strip(), self.alphabet) for site in open(binding_sites).readlines()] self.name = os.path.splitext(os.path.basename(binding_sites))[0] self.path = binding_sites elif type(binding_sites) == list: self.seqs = binding_sites self.n = len(self.seqs) # Default name if len(self.name) == 0: self.name = "pssm_%dbp_%dseqs" % (self.m, len(self.seqs)) # Construct motif self.motif = motifs.create(self.seqs) self.motif.pseudocounts = pseudocounts # Construct PSSM and reverse PSSM self.pssm = self.motif.pssm self.pssm_r = self.pssm.reverse_complement() self.m = self.pssm.length self.w = self.pssm.length self.length = self.pssm.length # Fast score primitives self.dict_pssm = dict(self.pssm) self.dict_pssm_r = dict(self.pssm_r) # Bayesian estimator self.estimator_initialized = False
def createlogo(infiles, outfile, title, scale): """ Creates a sequence logo in PNG format from a textfile of aligned sequences in flat format. Arguments: - infile - name of the flat format alignment file. - outfile - name of the png logo file. - title - title for the logo. - scale - yaxis scale, usually 1.0 return: None """ # input a text file with all sequences aligned in flat format. for fil in infiles: instances = [] for line in fil: # for each line in the file create a seq-object from it # and append this object to a list. instances.append(Seq(line.strip(), alphabet=IUPAC.ambiguous_dna)) # create a motif-object. The last sequence in instances # is empty, so skip it. m = motifs.create(instances[:-1], alphabet=IUPAC.ambiguous_dna) # create the sequence logo m.weblogo(outfile, logo_title=title, yaxis_scale=scale, stack_width="large")#,alphabet='ambiguous_dna_alphabet')
import numpy as np from Bio import SeqIO from Bio import motifs handle = open("/home/xc406/data/hg19randomnorepeat.fa", "rU") records = list(SeqIO.parse(handle, "fasta")) instances = [] for record in records: instances.append(record.seq) m = motifs.create(instances) pwm = m.counts.normalize(pseudocounts={'A':0.2953, 'C': 0.2047, 'G': 0.2047, 'T': 0.2953}) na ='\t'.join(list(map(str,pwm['A']))) nc ='\t'.join(list(map(str,pwm['C']))) ng ='\t'.join(list(map(str,pwm['G']))) nt ='\t'.join(list(map(str,pwm['T']))) r1 = '\t'.join(['A:',na]) r2 = '\t'.join(['C:',nc]) r3 = '\t'.join(['G:',ng]) r4 = '\t'.join(['T:',nt]) r = '\n'.join(['M0000_0.80',r1,r2,r3,r4]) with open('randompwm.txt','w') as f: f.write(r) with open('hg19chromdict.txt', 'r') as f: chromdict = pickle.load(f) chromdict.rand r = np.random.rand(10,4)
def add_attI(self): """ Looking for Att1 sites and add them to this integron. """ dist_atti = 500 # attI1 instances_attI1 = [Seq.Seq('TGATGTTATGGAGCAGCAACGATGTTACGCAGCAGGGCAGTCGCCCTAAAACAAAGTT')] attI1 = motifs.create(instances_attI1) attI1.name = "attI1" # attI2 instances_attI2 = [Seq.Seq('TTAATTAACGGTAAGCATCAGCGGGTGACAAAACGAGCATGCTTACTAATAAAATGTT')] attI2 = motifs.create(instances_attI2) attI2.name = "attI2" # attI3 instances_attI3 = [Seq.Seq('CTTTGTTTAACGACCACGGTTGTGGGTATCCGGTGTTTGGTCAGATAAACCACAAGTT')] attI3 = motifs.create(instances_attI3) attI3.name = "attI3" motif_attI = [attI1, attI2, attI3] if self.type() == "complete": if ((self.attC.pos_beg.values[0] - self.integrase.pos_end.values[0]) % self.replicon_size > (self.integrase.pos_beg.values[0] - self.attC.pos_end.values[-1]) % self.replicon_size): # if integrase after attcs (on the right) left = int(self.attC.pos_end.values[-1]) right = int(self.integrase.pos_beg.values[0]) else: left = int(self.integrase.pos_end.values[-1]) right = int(self.attC.pos_beg.values[0]) strand_array = self.attC.strand.unique()[0] elif self.type() == "In0": left = int(self.integrase.pos_beg) right = int(self.integrase.pos_end) strand_array = "both" elif self.type() == "CALIN": left = int(self.attC.pos_beg.values[0]) right = int(self.attC.pos_end.values[-1]) strand_array = self.attC.strand.unique()[0] if left < right: seq_attI = self.replicon.seq[left - dist_atti:right + dist_atti] else: seq_attI1 = self.replicon.seq[left - dist_atti:self.replicon_size] seq_attI2 = self.replicon.seq[:right + dist_atti] seq_attI = seq_attI1 + seq_attI2 for m in motif_attI: if strand_array == 1: mot = [m] elif strand_array == "both": mot = [m.reverse_complement(), m] else: mot = [m.reverse_complement()] for sa, mo in enumerate(mot): for pos, s in mo.instances.search(seq_attI): tmp_df = pd.DataFrame(columns=self._columns) tmp_df = tmp_df.astype(dtype=self._dtype) tmp_df["pos_beg"] = [(left - dist_atti + pos) % self.replicon_size] tmp_df["pos_end"] = [(left - dist_atti + pos + len(s)) % self.replicon_size] tmp_df["strand"] = [strand_array] if strand_array != "both" else [sa * 2 - 1] tmp_df["evalue"] = [np.nan] tmp_df["type_elt"] = "attI" tmp_df["annotation"] = "attI_%s" % (m.name[-1]) tmp_df["model"] = "NA" tmp_df.index = [m.name] tmp_df["distance_2attC"] = [np.nan] self.attI = self.attI.append(tmp_df)
def add_promoter(self): """ Looks for known promoters if they exists within your integrons element. It takes 1s for about 13kb. """ dist_prom = 500 # pb distance from edge of the element for which we seek promoter ######## Promoter of integrase ######### if self.has_integrase(): # PintI1 p_intI1 = motifs.create([Seq.Seq("TTGCTGCTTGGATGCCCGAGGCATAGACTGTACA")]) p_intI1.name = "P_intI1" # PintI2 # Not known # PintI3 # Not known motifs_Pint = [p_intI1] seq_p_int = self.replicon.seq[int(self.integrase.pos_beg.min()) - dist_prom: int(self.integrase.pos_end.max()) + dist_prom] for m in motifs_Pint: if self.integrase.strand.values[0] == 1: generator_motifs = m.instances.search(seq_p_int[:dist_prom]) for pos, s in generator_motifs: tmp_df = pd.DataFrame(columns=self._columns) tmp_df = tmp_df.astype(dtype=self._dtype) tmp_df["pos_beg"] = [self.integrase.pos_beg.values[0] - dist_prom + pos] tmp_df["pos_end"] = [self.integrase.pos_beg.values[0] - dist_prom + pos + len(s)] tmp_df["strand"] = [self.integrase.strand.values[0]] tmp_df["evalue"] = [np.nan] tmp_df["type_elt"] = "Promoter" tmp_df["annotation"] = "Pint_%s" %(m.name[-1]) tmp_df["model"] = "NA" tmp_df.index = [m.name] tmp_df["distance_2attC"] = [np.nan] self.promoter = self.promoter.append(tmp_df) else: generator_motifs = m.instances.reverse_complement().search(seq_p_int[-dist_prom:]) for pos, s in generator_motifs: tmp_df = pd.DataFrame(columns=self._columns) tmp_df = tmp_df.astype(dtype=self._dtype) tmp_df["pos_beg"] = [self.integrase.pos_end.max() + pos] tmp_df["pos_end"] = [self.integrase.pos_end.max() + pos + len(s)] tmp_df["strand"] = [self.integrase.strand.values[0]] tmp_df["evalue"] = [np.nan] tmp_df["type_elt"] = "Promoter" tmp_df["annotation"] = "Pint_%s" % (m.name[-1]) tmp_df["model"] = "NA" tmp_df.index = [m.name] tmp_df["distance_2attC"] = [np.nan] self.promoter = self.promoter.append(tmp_df) ######## Promoter of K7 ######### # Pc-int1 motifs_Pc = [] pc = SeqIO.parse(os.path.join(self.cfg.model_dir, "variants_Pc_intI1.fst"), "fasta") pseq = [i for i in pc] d = {len(i): [] for i in pseq} _ = [d[len(i)].append(i.seq.upper()) for i in pseq] for k, i in d.items(): motifs_Pc.append(motifs.create(i)) motifs_Pc[-1].name = "Pc_int1" # Pc-int2 # Not known # Pc-int3 pc_intI3 = motifs.create([Seq.Seq("TAGACATAAGCTTTCTCGGTCTGTAGGCTGTAATG"), Seq.Seq("TAGACATAAGCTTTCTCGGTCTGTAGGATGTAATG")]) pc_intI3.name = "Pc_int3" motifs_Pc.append(pc_intI3) if self.type() == "complete": if ((self.attC.pos_beg.values[0] - self.integrase.pos_end.values[0]) % self.replicon_size > (self.integrase.pos_beg.values[0] - self.attC.pos_end.values[-1]) % self.replicon_size): # if integrase after attcs (on the right) left = int(self.attC.pos_end.values[-1]) right = int(self.integrase.pos_beg.values[0]) else: left = int(self.integrase.pos_end.values[-1]) right = int(self.attC.pos_beg.values[0]) strand_array = self.attC.strand.unique()[0] elif self.type() == "In0": left = int(self.integrase.pos_beg.values[0]) right = int(self.integrase.pos_end.values[-1]) strand_array = "both" elif self.type() == "CALIN": left = int(self.attC.pos_beg.values[0]) right = int(self.attC.pos_end.values[-1]) strand_array = self.attC.strand.unique()[0] if left < right: seq_Pc = self.replicon.seq[left - dist_prom:right + dist_prom] else: seq_Pc1 = self.replicon.seq[left - dist_prom:self.replicon_size] seq_Pc2 = self.replicon.seq[:right + dist_prom] seq_Pc = seq_Pc1 + seq_Pc2 for m in motifs_Pc: if strand_array == 1: mot = [m] elif strand_array == "both": mot = [m.reverse_complement(), m] else: mot = [m.reverse_complement()] for sa, mo in enumerate(mot): for pos, s in mo.instances.search(seq_Pc): tmp_df = pd.DataFrame(columns=self._columns) tmp_df = tmp_df.astype(dtype=self._dtype) tmp_df["pos_beg"] = [(left - dist_prom + pos) % self.replicon_size] tmp_df["pos_end"] = [(left - dist_prom + pos + len(s)) % self.replicon_size] tmp_df["strand"] = [strand_array] if strand_array != "both" else [sa * 2 - 1] tmp_df["evalue"] = [np.nan] tmp_df["type_elt"] = "Promoter" tmp_df["annotation"] = "Pc_%s" % (m.name[-1]) tmp_df["model"] = "NA" tmp_df.index = [m.name] tmp_df["distance_2attC"] = [np.nan] self.promoter = self.promoter.append(tmp_df)
def subs(s, t): m = motifs.create([Seq(t)]) print ' '.join([str(pos + 1) for pos, seq in m.instances.search(Seq(s))])
def run(argv=None): settings, args = process_command_line(None) fragment_as_seq_list, negative_pos_score_list, positive_pos_score_list = \ format_file_input(settings.reads_fusion) # Score diagram pos_sum_dct, pos_count_dct = get_position_statistics(fragment_as_seq_list, negative_pos_score_list, positive_pos_score_list) stats_dict = {position: float(pos_sum_dct[position]) / float(pos_count_dct[position]) for position in pos_sum_dct.keys()} ligation_index = abs(min(stats_dict.keys())) lower_bound = min(stats_dict.keys()) upper_bound = max(stats_dict.keys()) if settings.distance: lower_bound = max(lower_bound, -settings.distance) upper_bound = min(upper_bound, settings.distance) # Generate average base score plot points = [[i, stats_dict[i]] for i in range (lower_bound, 0)] + \ [[i, stats_dict[i]] for i in range (1, upper_bound + 1)] plt.rcParams.update({"font.size": 18}) plt.xlim(lower_bound - 1, upper_bound + 1) plt.ylim(0, 1) plt.hold(True) for pt in points: plt.plot([pt[0], pt[0]], [0,pt[1]], "b") if settings.score_diagram != None: plt.savefig(settings.score_diagram) # Generate count per position plot points = [[i, pos_count_dct[i]] for i in range (lower_bound, 0)] + \ [[i, pos_count_dct[i]] for i in range (1, upper_bound + 1)] plt.xlim(lower_bound - 1, upper_bound + 1) plt.ylim(0, max(pos_count_dct.values())) plt.hold(True) for pt in points: plt.plot([pt[0], pt[0]], [0,pt[1]], "r") if settings.count_diagram != None: plt.savefig(settings.count_diagram) # Generate logo if settings.logo_file != None: padded_fragments_list = pad_sequences(fragment_as_seq_list) merged_fragments_list = merge_frgaments(padded_fragments_list) chimera_motifs = motifs.create(merged_fragments_list, ALPHABET) print chimera_motifs print chimera_motifs.counts create_sequnce_file("logo_data", merged_fragments_list, lower_bound, upper_bound, ligation_index) os.system("/home/users/amirbar/.local/bin/weblogo -A rna -c classic --resolution 600 --errorbars NO -i %(start_index)s --format PNG < logo_data > %(logo_file)s" % \ {"logo_file": settings.logo_file, "start_index": str(lower_bound)})
alphabet = Gapped(IUPAC.ambiguous_dna) sequences = [] handle = open("dataset/test.fasta","rU") #record_index = SeqIO.index("dataset/test.fasta", "fasta") for record in SeqIO.parse(handle,"fasta"): #print (record.id) #print (record.seq) #instances = sequences.append(record.seq) #m = motifs.create(record.seq) sequences = list(record.seq) m = motifs.create(sequences) #counts = m.counts print (sequences) print (m) #print (counts, end="") #print (m.counts) #alignment = AlignIO.read(open("dataset/test.fasta"), "fasta") #alignment = AlignIO.read(dna,"fasta") #summary_align = AlignInfo.SummaryInfo(alignment) #consensus = summary_align.dumb_consensus(threshold = 0, ambiguous = 'N', consensus_alpha = alphabet, require_multiple = 2)
def cons(stdin): m = motifs.create([record.seq for record in SeqIO.parse(stdin, 'fasta')]) print m.consensus for c in 'ACGT': print c + ': ' + ' '.join(map(str, m.counts[c]))
def build_motif(sites): """Builds a Biopython motifs.Motif object out of given sites.""" motif = motifs.create(sites) motif.pseudocounts = 0.8 # http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2647310/ return motif
def seq_iter(file): if file: found = re.search(r'(?i)(fasta|fa|fastq|fq)(.gz)?$', file) if not found: print("invalid file name suffix.\nfile name should like this: infile.[fasfa|fa|fastq|fq][.gz]", file=sys.stderr) sys.exit(1) seq_format, is_gz = found.groups() if seq_format == 'fa': seq_format = 'fasta' if seq_format == 'fq': seq_format = 'fastq' fh = gzip.open(file, 'rt') if is_gz else open(file, 'r') for record in SeqIO.parse(fh, seq_format): yield record.seq fh.close() else: for line in sys.stdin: yield Seq(line.strip()) if __name__ == '__main__': args = parse_args() seqs = seq_iter(args.infile) seqs2 = [seq for seq in seqs if not 'N' in seq] m = motifs.create(seqs2) print(m.pwm) # print(m.pssm) # m.weblogo("motif.png")
hba1 = "------------------------------------------------------CATAAACCCTGGC------------------------------------------------------------------------------GCGCTCGCGGCCCGGCACTCTTCTGGTCCCCA-CAGACTCAGAGAGAACCCACCATGGTGC---TGTCTCCTGCCGACAAGACCAACGTCAAGGCCGCCTGGGGTAAGGTCGGCGCGCACGCTGGCGAGTATGGTGCGGAGGCCCTGGAGAGGATGTTCCTGTCCTTCCCCACCACCAAGACCTACTTCCCGCACTTCG---ACCTGAGCCACGGCTCTG---------------CCCAGGTTAAGGGCCACGGCAAGAAGGTGGCCGACGCGCTGACCAACGCCGTGGCGCACGTGGACGACATGCCCAACGCGCTGTCCGCCCTGAGCGACCTGCACGCGCACAAGCTTCGGGTGGACCCGGTCAACTTCAAGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCCCTGCGGTGCACGCCTCCCTGGACAAGTTCCTGGCTTCTGTGAGCACCGTGCTGACCTCCAAATACCGTTAAGCTGGAGCCTCGGTGGCCATGCTTCTTGCCCCTTGGGCCTCCCCCCAGCCCCTCCTCCCCTTCCTGCACCCGTACCCCCGTGGTC-TTTGAATAAAGTCTGAGTGGGCGGCAAAAAAAAAAAAAAAAAAAAAA----------" hbb = "-------------------------------------------------------------------------------------------------------------------------------------------------ACATTTGC-TTCTGACACAACTGTGTTCACTAGCAACCTCAAA---CAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAG------TTGGTGGTGAGGCCCTGGGCAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGCTCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACACTGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGCTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCACCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAGTATCACTAAGCTCG------CTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATTTTCATTGC-----" hbd = "AGGGCAAGTTAAGGGAATAGTGGAATGAAGGTTCATTTTTCATTCTCACAAACTAATGAAACCCTGCTTATCTTAAACCAACCTGCTCACTGGAGCAGGGAGGACAGGACCAGCATAAAAGGCAGGGCAGAGTCGACTGTTGCTTACACTTTC-TTCTGACATAACAGTGTTCACTAGCAACCTCAAA---CAGACACCATGGTGCATCTGACTCCTGAGGAGAAGACTGCTGTCAATGCCCTGTGGGGCAAAGTGAACGTGGATGCAG------TTGGTGGTGAGGCCCTGGGCAGATTACTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCTCTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAGGTGCTAGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACTTTTTCTCAGCTGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGCTCTTGGGCAATGTGCTGGTGTGTGTGCTGGCCCGCAACTTTGGCAAGGAATTCACCCCACAAATGCAGGCTGCCTATCAGAAGGTGGTGGCTGGTGTGGCTAATGCCCTGGCTCACAAGTACCATTGAGATC-------CTGGACTGTTTCCTGATAACCATAAGAAGACCCTATTTCCCTAGATTCTATTTTCTGAACTTGGGAACACAATG-CCTACTTCAAGGGTATGGCTTCTGCCTAATAAAGAATGTTCAGCTCAACTTCCTGAT" hbg1 = "-------------------------------------------------------------------------------------------------------------------------------------------------ACACTCGC-TTCTGGAACGTCTGAGGTTATCAATAAGCTCCTAGTCCAGACGCCATGGGTCATTTCACAGAGGAGGACAAGGCTACTATCACAAGCCTGTGGGGCAAGGTGAATGTGGAAGATG------CTGGAGGAGAAACCCTGGGAAGGCTCCTGGTTGTCTACCCATGGACCCAGAGGTTCTTTGACAGCTTTGGCAACCTGTCCTCTGCCTCTGCCATCATGGGCAACCCCAAAGTCAAGGCACATGGCAAGAAGGTGCTGACTTCCTTGGGAGATGCCACAAAGCACCTGGATGATCTCAAGGGCACCTTTGCCCAGCTGAGTGAACTGCACTGTGACAAGCTGCATGTGGATCCTGAGAACTTCAAGCTCCTGGGAAATGTGCTGGTGACCGTTTTGGCAATCCATTTCGGCAAAGAATTCACCCCTGAGGTGCAGGCTTCCTGGCAGAAGATGGTGACTGCAGTGGCCAGTGCCCTGTCCTCCAGATACCACTGAGCTC-------ACTGCCCATGATTCAGAGCTTTCAAGGATAGGCTTTATTCTGCAAGC----------------------------AATACAAATAATAAATCTATTCTGCTGAGAGATCAC---------------------" ins = "------------------AGCCCTCCAGGACAGGCTGC-ATCAGAAGAGGCCATCAAGCA-GATCACTGTCCTTCTGCCATGGCCCTGTGGATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTGACCCAGCCGCAGCCTTTGTGAACCAACACCTGTGCGGCTCACACCTGGTGGAAGCTCTCTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACACACCCAAGACCCGCCGGGAGGCAGAGGACCTGCAGGTGGGGCAGGTGGAGCTGGGCGGGGGCCCTGGTGCAGGCAGCCTGCAGCCCTTGGCCCTGGAGGGGTCCCTGCAGAAGCGTGGCATTGTGGAACAATGCTGTACCAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAGACGCAGCCCGCAGGCAGCCCCACACCCGCCGCCTCCTGCACCGAGAGAGATGGAATAAAGCCCTTGAACCAGC----AAAA" ins2= "GGGGACCCAGTAACCACCAGCCCTAAGTGATCCGCTACAATCAAAAACCATCAGCAAGCAGGAAGGTTATTGTTTCAACATGGCCCTGTGGATGCGCTTCCTGCCCCTGCTGGCCCTGCTCTTCCTCTGGGAGTCCCACCCCACCCAGGCTTTTGTCAAGCAGCACCTTTGTGGTTCCCACCTGGTGGAGGCTCTCTACCTGGTGTGTGGGGAGCGTGGCTTCTTCTACACACCCATGTCCCGCCGTGAAGTGGAGGACCCACAAGTGGCACAACTGGAGCTGGGTGGAGGCCCGGGAGCAGGTGACCTTCAGACCTTGGCACTGGAGGTGGCCCAGCAGAAGCGTGGCATTGTAGATCAGTGCTGCACCAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAGACCCA--CCACTACCCAGCCTAC--------CCCTCTGCAAT----------GAATAAAACCTTTGAATGAGCACAAAAAA" exercise5instances = [ Seq("AGATAA"), Seq("TGATAA"), Seq("AGATAG"), Seq("TGATAG"), Seq("TGATCA"), Seq("TTATCA"), ] ex5motif = motifs.create(exercise5instances) ex5PWM = ex5motif.pwm ex5logo_heights = ex5motif.counts.normalize(pseudocounts = 1.0) ex5PSSM = ex5logo_heights.log_odds()
# from Bio.Seq import Seq with open(argv[1], 'r') as input_file: contents = input_file.read().split('E-value')[2:] lines = '-' * 80 stars = '*' * 80 for item in contents: evalue = float(item.split(stars)[0].strip().split('=')[1].strip()) # print item # print evalue if evalue <= 0.05: list_of_motifs = item.split('sites sorted by position p-value')[ 1].split(lines)[1].split('Site')[1].strip() # print list_of_motifs print ">matrix", evalue motif_list = [] for line in list_of_motifs.split('\n')[1:]: line_list = filter(None, line.split(' ')) motif_list.append(line_list[-2]) #print len(motif_list) m = motifs.create(motif_list) trans = m.format("transfac") # print trans for l in trans.split('\n')[1:-3]: numbers = filter(None, l.split(' '))[1:-1] print (' '.join(numbers))
print( 'species peak min max consensus') for sp in speciesA: fname = os.path.join(base, sp + '.iesdb') for lbi in range(0, len(lb)): # for each length bin f = open(fname, "r") peakNo = str("%0.3d" %lbi) figName = os.path.join(baseOut, sp + '.' + peakNo + '.png') f.readline() # header fbA = [] for line in f: line.strip() (idv, scaffold, altSeqNo, startLocs, endLocs, upstreamFlank, downstreamFlank,length, isFloating, merged, inCDS, inInter, inIntron, front, back, seq) = line.split("\t") l = int(length) if isFloating == '0' and merged == '0' and l >= lengthMin[lbi] and l < lengthMax[lbi]: # exclude if have an N if re.search('N', front): continue fbA.append(Seq(front[0:5])) f.close() fbm = motifs.create(fbA) title = sp + ': ' + str(lb[lbi]) + '[' + str(lengthMin[lbi]) + '-' + str(lengthMax[lbi]) + ']' print( sp + ' ' + str(lb[lbi]) + ' ' + str(lengthMin[lbi]) + ' ' + str(lengthMax[lbi]) + ' ' + fbm.consensus) if os.path.exists(figName): continue # do not redo figures # print(title) # fbm.weblogo(figName, logo_title = title) # quit()