예제 #1
0
    def map_libraries(self):
        """Map to arbitrary rna sequence libraries"""

        out = self.output
        libraries = self.libraries
        if libraries == '' or len(libraries) == 0:
            print('no libraries to map to')
            return

        #map to provided libraries
        print('mapping to these libraries: %s' % libraries)
        res, counts = base.map_rnas(self.files,
                                    libraries,
                                    self.temp_path,
                                    aligner=self.aligner,
                                    samplelabels=self.labels,
                                    params=self.aligner_params)
        if res is None:
            print('empty data returned. did alignments run?')
            return
        print('results saved to rna_counts.csv')
        res.to_csv(os.path.join(out, 'rna_found.csv'), index=False)
        counts.to_csv(os.path.join(out, 'rna_counts.csv'), index=False)
        plot_results(res, out)
        return
예제 #2
0
파일: app.py 프로젝트: dmnfarrell/mirnaseq
    def map_libraries(self):
        """Map to arbitrary rna sequence libraries"""

        out = self.output
        libraries = self.libraries
        if libraries == '' or len(libraries) == 0:
            print ('no libraries to map to')
            return

        #map to provided libraries
        print ('mapping to these libraries: %s' %libraries)
        res, counts = base.map_rnas(self.files, libraries, self.temp_path,
                                    aligner=self.aligner,
                                    samplelabels=self.labels,
                                    params=self.aligner_params)
        if res is None:
            print ('empty data returned. did alignments run?')
            return
        print ('results saved to rna_counts.csv')
        res.to_csv( os.path.join(out, 'rna_found.csv'),index=False)
        counts.to_csv( os.path.join(out, 'rna_counts.csv'), index=False )
        plot_results(res, out)
        return
예제 #3
0
파일: app.py 프로젝트: dmnfarrell/mirnaseq
    def map_mirnas(self):
        """Map miRNAs using mirbase with isomir counts and do novel prediction
           if a reference genome and index is provided"""

        out = self.output
        libraries = self.libraries
        temp = self.temp_path
        ref_name = self.ref_name
        mat_name = 'mirbase-%s' %self.species
        self.aligner_params[mat_name] = self.mirna_params
        novel.VERBOSE = self.verbose

        if self.check_index(ref_name) == False:
            print ('no index for reference genome')
            ref_name = ''

        print ('mapping miRNAs..')
        res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries,
                                       species=self.species, ref_genome=ref_name,
                                       pad5=self.pad5, pad3=self.pad3, aligner=self.aligner,
                                       samplelabels=self.labels,
                                       params=self.aligner_params,
                                       verbose=self.verbose)

        #seperate out mature counts and save
        matcounts = counts[counts.ref==mat_name]
        res.to_csv( os.path.join(out, 'results.csv'),index=False )
        res = res[res.ref!=ref_name]
        matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False,
                            float_format='%.1f' )
        counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f')
        plot_results(res, out)

        #isomir counting
        print ()
        print ('counting isomirs..')
        iso, isocounts = base.map_isomirs(self.files, temp, self.species,
                                          samplelabels=self.labels)
        isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'), index=False, float_format='%.1f')

        #novel prediction
        if self.ref_fasta == '' or not os.path.exists(self.ref_fasta):
            print ('no reference genome file, skipping novel mirna step')
        elif ref_name == None or ref_name == '':
            print ('no index for ref genome, required for novel mirna step')
        elif check_viennarna() == False:
            print ('Vienna RNA package not installed')
            print ('see https://www.tbi.univie.ac.at/RNA/')
        else:
            print ()
            print ('predicting novel mirnas..')
            start = time.time()
            #change map_rnas so it can use remaining files from previous run....?

            allreads = utils.combine_aligned_reads(temp, idx=ref_name)
            new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species,
                                       score_cutoff=float(self.score_cutoff),
                                       read_cutoff=int(self.read_cutoff),
                                       cpus=self.cpus)
            if new is None or len(new) == 0:
                print ('could not find any novel mirnas at this score cutoff')
                return
            if self.strict == True:
                new = new[new.mature_check=='ok']
                print ('filtered %s' %len(new))
            new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False)

            #pad mature novel and write to fasta for counting
            novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature')
            novpad = novpad.drop_duplicates('name')
            utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'),
                                     seqkey='sequence', idkey='name')
            novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html'))

            #now count novel mirnas for all samples
            build_indexes(os.path.join(out,'novel.fa'), self.index_path)
            r,nc = base.map_rnas(self.files, ['novel'], self.temp_path,
                                 aligner=self.aligner,
                                 samplelabels=self.labels)
            nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False )
            end = round(time.time()-start,1)
            print ('took %s seconds' %str(end))
        return
예제 #4
0
    def map_mirnas(self):
        """Map miRNAs using mirbase with isomir counts and do novel prediction
           if a reference genome and index is provided"""

        out = self.output
        libraries = self.libraries
        temp = self.temp_path
        ref_name = self.ref_name
        mat_name = 'mirbase-%s' %self.species
        self.aligner_params[mat_name] = self.mirna_params
        novel.VERBOSE = self.verbose

        if self.check_index(ref_name) == False:
            print ('no index for reference genome')
            ref_name = ''

        print ('mapping miRNAs..')
        res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries,
                                       species=self.species, ref_genome=ref_name,
                                       pad5=self.pad5, pad3=self.pad3, aligner=self.aligner,
                                       samplelabels=self.labels,
                                       params=self.aligner_params,
                                       verbose=self.verbose)

        self.results = res
        #seperate out mature counts and save
        matcounts = counts[counts.ref==mat_name]
        res.to_csv( os.path.join(out, 'results.csv'),index=False )
        res = res[res.ref!=ref_name]
        matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False,
                            float_format='%.1f' )
        counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f')

        #get fractions per sample and plot results
        c = base.pivot_count_data(res, idxcols=['name','ref'])
        self.samples = s = base.get_fractions_mapped(res)
        print (s)
        plot_results(s, c, out)

        #isomir counting
        print ()
        print ('counting isomirs..')
        iso, isocounts = base.map_isomirs(self.files, temp, self.species,
                                          samplelabels=self.labels)
        if isocounts is not None:
            isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'),
                                index=False, float_format='%.1f')
        else:
            print ('no isomirs could be counted')
        #novel prediction
        #train classifier first if not present
        novel.create_classifier()

        if self.ref_fasta == '' or not os.path.exists(self.ref_fasta):
            print ('no reference genome file, skipping novel mirna step')
        elif ref_name == None or ref_name == '':
            print ('no index for ref genome, required for novel mirna step')
        elif check_viennarna() == False:
            print ('Vienna RNA package not installed')
            print ('see https://www.tbi.univie.ac.at/RNA/')
        else:
            print ()
            print ('predicting novel mirnas..')
            start = time.time()
            #change map_rnas so it can use remaining files from previous run....?

            allreads = utils.combine_aligned_reads(temp, idx=ref_name)
            new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species,
                                       score_cutoff=float(self.score_cutoff),
                                       read_cutoff=int(self.read_cutoff),
                                       cpus=self.cpus)
            if new is None or len(new) == 0:
                print ('Could not find any novel mirnas.')
                print ('There may not be sufficient aligned reads or the score cutoff is too high.\n')
                return
            if self.strict == True:
                new = new[new.mature_check=='ok']
                print ('filtered %s' %len(new))
            new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False)

            #pad mature novel and write to fasta for counting
            novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature')
            novpad = novpad.drop_duplicates('name')
            utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'),
                                     seqkey='sequence', idkey='name')
            novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html'))

            #now count novel mirnas for all samples
            build_indexes(os.path.join(out,'novel.fa'), self.index_path)
            r,nc = base.map_rnas(self.files, ['novel'], self.temp_path,
                                 aligner=self.aligner,
                                 samplelabels=self.labels)
            nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False )
            end = round(time.time()-start,1)
            print ('took %s seconds' %str(end))
        return