def run_hmmtop(self): ht = hmmtop.tools() ht.add_library('res', self.master_seq_file) ht.scan_libraries() self.tms = ht.results self.hmmtop = [] gap_seqs = {} for i in SeqIO.parse('hmmtop.in', 'fasta'): gap_seqs[i.id] = str(i.seq) for i, j in ht.results['res'].items(): for k in j.values(): korrect = [ self.get_pos(k[0], gap_seqs[i]), self.get_pos(k[1], gap_seqs[i]) ] point = range(korrect[0], korrect[1] + 1) self.hmmtop.append(median(point)) graph = {} for i in range(1, len(self.master_seq[0]) + 1): graph[i] = 0 for tms in self.hmmtop: graph[i] += 1 for key, hits in graph.items(): ratio = Decimal(hits) / Decimal(len(self.master_seq)) self.tms_ratio[key] = ratio return True
def hmmtop(self): db = self.indir + '/hmmtop.db' if os.path.exists(db) and self.debug: self.tms = pickle.load(open(db, 'r')) return ht = hmmtop.tools() ht.add_library('queries', self.indir + "/myqueries.faa") # Genome ht.add_library('tcdb', self.indir + "/mytcdb.faa") ht.scan_libraries() pickle.dump(ht.results, open(db, 'wb')) self.tms = ht.results return
def __call__(self): if os.path.exists(self.outdir) is False: os.mkdir(self.outdir) self.tssearch() # Swap subjects & targets if needed - subject is always shorter if self.swap: (self.subject_file,self.target_file) = (self.target_file,self.subject_file) (self.subject_name,self.target_name) = (self.target_name,self.subject_name) (self.srestrict,self.trestrict) = (self.trestrict,self.srestrict) # Set subject & target names if self.subject_name is None: self.subject_name = self.subject_file.split('/')[-1] if self.target_name is None: self.target_name = self.target_file.split('/')[-1] # Write selected subjects & targets, then run HMMTOP subject_ids = self.tssdata.keys() target_ids = [] for i in self.tssdata.values(): target_ids.extend([x['target_symbol'] for x in i]) target_ids = list(set(target_ids)) subjects = SeqIO.parse(self.subject_file,'fasta') self.subjects_dict = SeqIO.to_dict(subjects) targets = SeqIO.parse(self.target_file,'fasta') self.targets_dict = SeqIO.to_dict(targets) with open(self.outdir+'/subjects.faa','wb+') as sf: for sid in subject_ids: SeqIO.write(self.subjects_dict[sid],sf,'fasta') with open(self.outdir+'/targets.faa','wb+') as tf: for tid in target_ids: SeqIO.write(self.targets_dict[tid],tf,'fasta') hmt = hmmtop.tools() hmt.add_library('subjects',self.outdir+'/subjects.faa') hmt.add_library('targets',self.outdir+'/targets.faa') hmt.scan_libraries() self.tms = hmt.results # Run GSAT on all of these self.run_global_alignments() # Sort global data self.globaldata.sort(key=lambda x:float(x['gsat_score']),reverse=True) for ii,i in enumerate(self.globaldata): try: z = int(i['gsat_score']) z = 0 if z<0 else z except: z = 0 self.globaldata[ii]['gsat_score'] = z self.globaldata.sort(key=lambda x:int(x['gsat_score']),reverse=True) # Generate HTML report self.generate_report()
def __call__(self, seq, gap): self.annotate = [] self.gap = str(gap) try: self.seq = SeqIO.read(seq, 'fasta') except: self.seq = seq try: if self.hmmtop is False: hmt = hmmtop.tools() hmt.add_library("gap", seq) hmt.scan_libraries() self.global_tms_ranges = hmt.results['gap'][self.seq.id] else: self.global_tms_ranges = self.hmmtop[self.seq.id] except: return None self.parse_gap() self.build_label() return self.annotation
def __call__(self): # First, run HMMTOP of course ht = hmmtop.tools() ht.add_library('subject', self.subject_file) ht.add_library('target', self.target_file) ht.scan_libraries() # Now, build the TMS sequence self.subject_loops = self.build_flanks(self.subject_file, ht.results['subject']) self.target_loops = self.build_flanks(self.target_file, ht.results['target']) # Glue the loops together and write file if os.path.exists(self.outdir) is False: os.mkdir(self.outdir) self.write_fastas() # Give names to each unit if self.subjectname is None: self.subjectname = self.mysubject_file.name.split('/')[-1] if self.targetname is None: self.targetname = self.mytarget_file.name.split('/')[-1] self.build_target_matrix() self.cross_matrix() self.construct_report()
def calculate(fasta, label, out): #os.environ['BIOV_DEBUG'] = 'True' ht = hmmtop.tools() ht.add_library('c', fasta) ht.scan_libraries() tms = ht.results['c'] total = open(fasta, 'r').read().count('>') zeros = total - len(tms.keys()) tabs = {} for i in tms.values(): count = len(i) tabs.setdefault(count, 0) tabs[count] += 1 tabs.setdefault(0, zeros) biggest = tabs.keys() biggest.sort() biggest = biggest[-1] [tabs.setdefault(i, 0) for i in range(biggest)] xlocations = na.array(range(len(tabs.keys()))) plt.xticks(xlocations + 0.3 / 2, xlocations) plt.bar(xlocations, tabs.values(), width=0.3) plt.title(label) plt.savefig(out, dpi=80, format="png") return tabs, tms
#fig.frameon = False #fig.add_axes([0,0,0,0],frameon=False) fig.set_size_inches(width, 5.5) self.outfile = title_in plt.savefig(self.outfile, dpi=80, format="png", bbox_inches='tight', pad_inches=0.003) else: plt.show() if __name__ == '__main__': ht = hmmtop.tools() ht.add_library('HT', fastafile.name) ht.scan_libraries() try: tms = ht.results['HT']['what'].values() except: tms = [] eh = what(seq, window, angle) eh.graph(tms, 'Hydropathy & Amphipathicity', True) print '<HTML><head><title>Hydropathy & Amphipathicty plot</title></head><body>' print "<img src ='%s'><br><br>" % eh.outfile print '<font color="Blue">Blue lines denote Hydropathy</font><br>' print '<font color="Red">Red lines denote Amphipathicity</font><br>' print '<font color="#C5860D">Orange bars mark transmembrane segments as predicted by HMMTOP</font>' print '</body></HTML>'