def map_libraries(self): """Map to arbitrary rna sequence libraries""" out = self.output libraries = self.libraries if libraries == '' or len(libraries) == 0: print('no libraries to map to') return #map to provided libraries print('mapping to these libraries: %s' % libraries) res, counts = base.map_rnas(self.files, libraries, self.temp_path, aligner=self.aligner, samplelabels=self.labels, params=self.aligner_params) if res is None: print('empty data returned. did alignments run?') return print('results saved to rna_counts.csv') res.to_csv(os.path.join(out, 'rna_found.csv'), index=False) counts.to_csv(os.path.join(out, 'rna_counts.csv'), index=False) plot_results(res, out) return
def map_libraries(self): """Map to arbitrary rna sequence libraries""" out = self.output libraries = self.libraries if libraries == '' or len(libraries) == 0: print ('no libraries to map to') return #map to provided libraries print ('mapping to these libraries: %s' %libraries) res, counts = base.map_rnas(self.files, libraries, self.temp_path, aligner=self.aligner, samplelabels=self.labels, params=self.aligner_params) if res is None: print ('empty data returned. did alignments run?') return print ('results saved to rna_counts.csv') res.to_csv( os.path.join(out, 'rna_found.csv'),index=False) counts.to_csv( os.path.join(out, 'rna_counts.csv'), index=False ) plot_results(res, out) return
def map_mirnas(self): """Map miRNAs using mirbase with isomir counts and do novel prediction if a reference genome and index is provided""" out = self.output libraries = self.libraries temp = self.temp_path ref_name = self.ref_name mat_name = 'mirbase-%s' %self.species self.aligner_params[mat_name] = self.mirna_params novel.VERBOSE = self.verbose if self.check_index(ref_name) == False: print ('no index for reference genome') ref_name = '' print ('mapping miRNAs..') res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries, species=self.species, ref_genome=ref_name, pad5=self.pad5, pad3=self.pad3, aligner=self.aligner, samplelabels=self.labels, params=self.aligner_params, verbose=self.verbose) #seperate out mature counts and save matcounts = counts[counts.ref==mat_name] res.to_csv( os.path.join(out, 'results.csv'),index=False ) res = res[res.ref!=ref_name] matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False, float_format='%.1f' ) counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f') plot_results(res, out) #isomir counting print () print ('counting isomirs..') iso, isocounts = base.map_isomirs(self.files, temp, self.species, samplelabels=self.labels) isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'), index=False, float_format='%.1f') #novel prediction if self.ref_fasta == '' or not os.path.exists(self.ref_fasta): print ('no reference genome file, skipping novel mirna step') elif ref_name == None or ref_name == '': print ('no index for ref genome, required for novel mirna step') elif check_viennarna() == False: print ('Vienna RNA package not installed') print ('see https://www.tbi.univie.ac.at/RNA/') else: print () print ('predicting novel mirnas..') start = time.time() #change map_rnas so it can use remaining files from previous run....? allreads = utils.combine_aligned_reads(temp, idx=ref_name) new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species, score_cutoff=float(self.score_cutoff), read_cutoff=int(self.read_cutoff), cpus=self.cpus) if new is None or len(new) == 0: print ('could not find any novel mirnas at this score cutoff') return if self.strict == True: new = new[new.mature_check=='ok'] print ('filtered %s' %len(new)) new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False) #pad mature novel and write to fasta for counting novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature') novpad = novpad.drop_duplicates('name') utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'), seqkey='sequence', idkey='name') novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html')) #now count novel mirnas for all samples build_indexes(os.path.join(out,'novel.fa'), self.index_path) r,nc = base.map_rnas(self.files, ['novel'], self.temp_path, aligner=self.aligner, samplelabels=self.labels) nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False ) end = round(time.time()-start,1) print ('took %s seconds' %str(end)) return
def map_mirnas(self): """Map miRNAs using mirbase with isomir counts and do novel prediction if a reference genome and index is provided""" out = self.output libraries = self.libraries temp = self.temp_path ref_name = self.ref_name mat_name = 'mirbase-%s' %self.species self.aligner_params[mat_name] = self.mirna_params novel.VERBOSE = self.verbose if self.check_index(ref_name) == False: print ('no index for reference genome') ref_name = '' print ('mapping miRNAs..') res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries, species=self.species, ref_genome=ref_name, pad5=self.pad5, pad3=self.pad3, aligner=self.aligner, samplelabels=self.labels, params=self.aligner_params, verbose=self.verbose) self.results = res #seperate out mature counts and save matcounts = counts[counts.ref==mat_name] res.to_csv( os.path.join(out, 'results.csv'),index=False ) res = res[res.ref!=ref_name] matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False, float_format='%.1f' ) counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f') #get fractions per sample and plot results c = base.pivot_count_data(res, idxcols=['name','ref']) self.samples = s = base.get_fractions_mapped(res) print (s) plot_results(s, c, out) #isomir counting print () print ('counting isomirs..') iso, isocounts = base.map_isomirs(self.files, temp, self.species, samplelabels=self.labels) if isocounts is not None: isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'), index=False, float_format='%.1f') else: print ('no isomirs could be counted') #novel prediction #train classifier first if not present novel.create_classifier() if self.ref_fasta == '' or not os.path.exists(self.ref_fasta): print ('no reference genome file, skipping novel mirna step') elif ref_name == None or ref_name == '': print ('no index for ref genome, required for novel mirna step') elif check_viennarna() == False: print ('Vienna RNA package not installed') print ('see https://www.tbi.univie.ac.at/RNA/') else: print () print ('predicting novel mirnas..') start = time.time() #change map_rnas so it can use remaining files from previous run....? allreads = utils.combine_aligned_reads(temp, idx=ref_name) new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species, score_cutoff=float(self.score_cutoff), read_cutoff=int(self.read_cutoff), cpus=self.cpus) if new is None or len(new) == 0: print ('Could not find any novel mirnas.') print ('There may not be sufficient aligned reads or the score cutoff is too high.\n') return if self.strict == True: new = new[new.mature_check=='ok'] print ('filtered %s' %len(new)) new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False) #pad mature novel and write to fasta for counting novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature') novpad = novpad.drop_duplicates('name') utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'), seqkey='sequence', idkey='name') novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html')) #now count novel mirnas for all samples build_indexes(os.path.join(out,'novel.fa'), self.index_path) r,nc = base.map_rnas(self.files, ['novel'], self.temp_path, aligner=self.aligner, samplelabels=self.labels) nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False ) end = round(time.time()-start,1) print ('took %s seconds' %str(end)) return