Ejemplo n.º 1
0
    def analyze_assembly_rna(self, query=None, name=None):
        self.print_start_of_process_pid()

        if query:
            if not name:
                name = self.get_output_header()
            rnammer_output = os.path.join(
                self.get_work_dir(),
                self.__concatenate_file_name(name, 'rnammer.out'))
            rdp_output = os.path.join(
                self.get_work_dir(),
                self.__concatenate_file_name(name, 'rdp.out'))
            table_output = os.path.join(self.get_table_dir(), name)

            cmd_list = [
                'run_rna_analysis.py', '-c', '-f', rnammer_output, '-r',
                rdp_output, query
            ]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)

            cmd_list = [
                'analyze_rna_hits.py', '-c', '-r', rdp_output, '-o',
                table_output, rnammer_output
            ]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)

            self.print_end_of_process_pid()
Ejemplo n.º 2
0
def main():
    # set up command
    cmd = None
    task = options.task

    if options.make_blastdb:
        type = 'nucl'
        if options.is_protein:
            type = 'prot'
        cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type]
        rc = RunCommand(cmd_list)
        print "Running command:  " + rc.get_command() + '\n'
        rc.run_command()

    if options.vec_screen:
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], evalue=700, outfmt=options.outfmt, reward=1,
            penalty=-5, gapopen=3, gapextend=3, dust='yes', searchsp=1750000000000,
            out=options.output, task=task, num_threads=options.threads)
    elif options.ncbi_screen:
        # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation)
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=90, lcase_masking='true', task='megablast',
            out=options.output, num_threads=options.threads)
        #Blast documentation says soft_masking option if not specified defaults to true so even though
        #this biopython class does not support it the feature should be enabled by default
    elif options.mito_screen:
        # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation)
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=98.6, lcase_masking='true', task='blastn',
            out=options.output, num_threads=options.threads)
        #Blast documentation says soft_masking option if not specified defaults to true so even though
        #this biopython class does not support it the feature should be enabled by default
    elif options.rRNA_screen:
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=95, lcase_masking='', task='megablast',
            out=options.output, num_threads=options.threads, evalue=1e-9, window_size=120, gapextend=2, gapopen=4,
            no_greedy='', penalty=-4, reward=3, word_size=12, xdrop_gap=20)
            #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3,
    else:
        if options.task in TASKS:
            program = TASKS[options.task]
            if re.search("Ncbiblastn", str(program)):
                cmd = program(query=args[1], db=args[0], evalue=options.evalue,
                    outfmt=options.outfmt, out=options.output,
                    num_threads=options.threads, max_target_seqs=options.max_targets,
                    task=task)
            else:
                cmd = program(query=args[1], db=args[0], evalue=options.evalue,
                    outfmt=options.outfmt, out=options.output,
                    num_threads=options.threads, max_target_seqs=options.max_targets)
        else:
            print "Unrecognized blast task, " + options.task
            sys.exit(-1)

    print "Running BLAST command:  " + str(cmd) + '\n'
    out, err = cmd()
#    print "out: out\terr: err"

    return 0
Ejemplo n.º 3
0
def main():

    output_header = options.output
    if not output_header:
        output_header = __make_prefix_from_files(args[0])

    rc = RunCommand(__build_insert_size_command(args[0],output_header,options.insert_size,options.std_dev))
    out = rc.run_command()

    return 0
Ejemplo n.º 4
0
def main():

    delta = args[0]

    # get information about our inputs
    reference, query = _get_query_and_reference_from_delta_file(delta)
    query_seqs = _get_query_sequences(query)
    reference_list, reference_lengths = _get_reference_details(reference)

    # get alignment information
    command = _get_show_tilings_command(options.output, options.id,
                                        options.coverage, options.circular,
                                        delta)
    reference_tilings = _get_tilings_information(command)

    # see if we need circular query information
    circular_queries = {}
    if options.circular:
        command = _get_circular_show_tilings_command(options.output, delta)
        circular_queries = _get_circular_alignments(command)

    print "Ordering and orienting using", delta
    print "Reference", reference
    print "Query", query

    # get ono information from our gathered data
    ono_sequences_list, ono_details_list = _parse_tilings(
        reference_list, reference_lengths, reference_tilings, query_seqs,
        circular_queries)

    # print out our output
    _write_details_to_file(ono_details_list,
                           options.output + ".ono.details.txt")
    interim_fasta = options.output + ".interim.fasta"
    SeqIO.write(ono_sequences_list, interim_fasta, "fasta")
    make_assembly_command = [
        "make_standard_assembly_files.py", "-S", interim_fasta, "-o",
        options.output + ".ono"
    ]

    if options.rename:
        make_assembly_command += ['-r']

    rc = RunCommand(make_assembly_command)
    print "Executing", rc.get_command()
    rc.run_command()

    return 0
Ejemplo n.º 5
0
    def generate_bam_plots(self,
                           bam_files=[],
                           ref=None,
                           name=None,
                           ref_header='reference',
                           window_size=1000):
        self.print_start_of_process_pid()

        if bam_files and ref:
            if not name:
                name = self.get_output_header()
            analysis_output_header = self.__concatenate_file_name(
                name, ref_header)
            data_dump_file = os.path.join(
                self.get_work_dir(),
                self.__concatenate_file_name(analysis_output_header,
                                             'gc_cvg.details.txt'))
            plot_output_header = os.path.join(self.get_chart_dir(),
                                              analysis_output_header)
            histo_plot = self.__concatenate_file_name(plot_output_header,
                                                      'gc_cvg')

            cmd_list = [
                "generate_bam_plots.py", "-g", ref, "-d", data_dump_file, "-w",
                str(window_size), "-o", plot_output_header, "-hi", histo_plot
            ] + bam_files

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 6
0
    def align_reads(self,
                    unmapped_bam=None,
                    ref=None,
                    threads=None,
                    aligner='BWA',
                    ref_header='reference',
                    make_index=True,
                    align_type='-s'):
        self.print_start_of_process_pid()
        if unmapped_bam and ref:
            if not threads:
                threads = self.get_num_threads()
            output_header = re.sub("unmapped.bam", ref_header, unmapped_bam)
            cmd_list = [
                'align_reads.py', '-i', unmapped_bam, '-o', output_header,
                '-r', ref, '-a', aligner, align_type, '-t',
                str(threads), '-T',
                self.get_work_dir()
            ]
            if not make_index:
                cmd_list += ['-x']
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()

            return self.__concatenate_file_name(output_header, 'bam')
Ejemplo n.º 7
0
    def revert_to_bam(self, files=[], output_base='reads', direction='fr'):
        self.print_start_of_process_pid()

        if files:
            output_bam = os.path.join(
                self.get_work_dir(),
                self.__concatenate_file_name(output_base, 'unmapped.bam'))
            for i in files:
                if self.__is_bam(i) and not self.__is_aligned_bam(i):
                    os.symlink(i, output_bam)
                    self.print_end_of_process_pid()
                    return output_bam

            cmd_list = ['read_format_converter.py', '-o', output_bam]
            if direction:
                cmd_list += ['-d', direction]
            cmd_list += files

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)

            self.print_end_of_process_pid()

            if not os.path.exists(output_bam):
                return None
            return output_bam
Ejemplo n.º 8
0
    def run_nucmer(self,
                   query=None,
                   ref=None,
                   name=None,
                   extension='ref_vs_assembly'):
        self.print_start_of_process_pid()

        if query and ref:
            if not name:
                name = self.get_output_header()
            prefix = os.path.join(
                self.get_work_dir(),
                self.__concatenate_file_name(name, extension))
            cmd_list = [
                'run_nucmer.py', '--mummerplot', '-p', prefix, ref, query
            ]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)

            self.link_files(self.get_chart_dir(),
                            [self.__concatenate_file_name(prefix, 'png')])

            self.print_end_of_process_pid()

            return self.__concatenate_file_name(prefix, 'coords')
Ejemplo n.º 9
0
    def make_detailed_table(self, name=None, aligned_bam_dict=None, contigs=None, agp=None, taxonomy_output=None):
        self.print_start_of_process_pid()

        # aligned_bam_dict=["type" =
        #                    ["group1 = [
        #                       [file1, file2, filen]
        #                                           
        #                                ]
        #                    ]
        #                 ]
        if contigs and agp:
            if not name:
                name = self.get_output_header()
            out_header = os.path.join(self.get_table_dir(),name)
            cmd_list = ['make_detailed_assembly_table.py','-c',out_header,'-s',out_header,'-a',agp]
            if taxonomy_output:
                cmd_list += ['-t',taxonomy_output]

            if aligned_bam_dict:
                for type in aligned_bam_dict.keys():
                    arg = "--" + str(type) + "_bam"
                    bam_file_string = ''

                    for group in aligned_bam_dict[type].keys():
                        bam_file_string += aligned_bam_dict[type][group]['file'] + ','
                    cmd_list += [arg,bam_file_string[:-1]]

            cmd_list += [contigs]

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
            return self.__concatenate_file_name(out_header, 'contig_detail.table.txt')
Ejemplo n.º 10
0
    def run_blast(self,
                  query=None,
                  db=constant.BLAST_NT,
                  threads=None,
                  extension='blast.xml',
                  name=None,
                  blast_task='megablast'):
        self.print_start_of_process_pid()

        if query:
            if not threads:
                threads = self.get_num_threads()
            if not name:
                name = self.get_output_header()
            blast_output = self.__concatenate_file_name(
                os.path.join(self.get_work_dir(), name), extension)

            rc = RunCommand([
                'run_blast.py', '-o', blast_output, '-b', blast_task, '-t',
                str(threads), db, query
            ])
            self.print_and_run_command(rc)

            self.print_end_of_process_pid()

            return blast_output
Ejemplo n.º 11
0
    def standardize_file_inputs(self, scaffolds=None, contigs=None, agp=None, minScaffSize=1, minConSize=1, minGapSize=10, rename=True):
        # We will make the assembly files from the inputs to ensure minimum NCBI gap size is enforce
        #if contigs and agp and scaffolds:
        #    return self.copy_files(self.get_work_dir(),[scaffolds,contigs,agp])

        self.print_start_of_process_pid()
        
        if not contigs and not scaffolds:
            print "Must give a fasta file for make_standard_assembly_files."
            sys.exit(-1)
        
        out_header = os.path.join(self.get_work_dir(),self.get_output_header())
        cmd_list = ['make_standard_assembly_files.py','-o',out_header,'-s', str(minScaffSize),'-c', str(minConSize), '-g', str(minGapSize)]

        if rename:
            cmd_list += ['--rename']
         
        if contigs:
            cmd_list += ['-C', contigs]
        if agp:
            cmd_list += ['-A', agp]
        if scaffolds:
            cmd_list += ['-S', scaffolds]            

        rc = RunCommand(cmd_list)
        self.print_and_run_command(rc)
        self.print_end_of_process_pid()

        return self.__concatenate_file_name(out_header,"scaffolds.fasta"),\
               self.__concatenate_file_name(out_header, "contigs.fasta"), \
               self.__concatenate_file_name(out_header, "agp")
Ejemplo n.º 12
0
    def get_simple_bam_stats(self, bam_files=[], name=None, ref_header='reference'):
        self.print_start_of_process_pid()

        if bam_files:
            if not name:
                name = self.get_output_header()
            cmd_list = ['get_simple_bam_stats.py','-o', os.path.join(self.get_table_dir(),self.__concatenate_file_name(name, ref_header))] + bam_files
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 13
0
    def parse_blast_xml(self, blast_xml=None):
        self.print_start_of_process_pid()

        if blast_xml:
            parse_output = re.sub("xml", "parsed.txt", blast_xml)
            rc = RunCommand(
                ['parse_blast_xml.py', '-o', parse_output, blast_xml])
            self.print_and_run_command(rc)

            self.print_end_of_process_pid()
            return parse_output
Ejemplo n.º 14
0
def main():

    if __check_inputs(options.classify, options.rdp_out):
        rnammer_cmd = __build_rnammer_cmd(args[0], options.rnammer_out,
                                          options.gene, options.superkingdom)
        rc = RunCommand(rnammer_cmd)
        print "RUNNING:  " + rc.get_command()
        rc.run_command()

        if options.classify:
            rdp_cmd = __build_rdp_cmd(options.rnammer_out, options.rdp_out)
            rc = RunCommand(rdp_cmd)
            print "RUNNING:  " + rc.get_command()
            rc.run_command()

        return 0

    else:
        print "If classifying hits, must supply classify flag and rdp output file."
        sys.exit(-1)
Ejemplo n.º 15
0
def _get_circular_show_tilings_command(output_header, delta):
    """
    Build up the circular show-tilings command from output header and 
    the nucmer delta file.
    """
    arg_list = [
        '-R', '-a', '-v 5', '-g -1', '-V 0', '-u', output_header + '.unplaced',
        '-c'
    ]

    return RunCommand(_build_showtiling_command(delta, arg_list))
Ejemplo n.º 16
0
    def get_kmer_copy_number(self, fasta=None, name=None, kmer_size=29):
        self.print_start_of_process_pid()

        if fasta:
            if not name:
                name = self.get_output_header()
            cmd_list = ['kmer_copy_number.py', '-k', str(kmer_size), '-o', os.path.join(self.get_table_dir(), name), fasta]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
        
            self.print_end_of_process_pid()
Ejemplo n.º 17
0
    def run_kmer_coverage(self, ref=None, query=None, name=None, kmer_size=29):
        self.print_start_of_process_pid()

        if ref and query:
            if not name:
                name = self.get_output_header()
            cmd_list = ['kmer_coverage.py', '-k', str(kmer_size), '-o', os.path.join(self.get_table_dir(), name), ref, query]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            
            self.print_end_of_process_pid()
Ejemplo n.º 18
0
    def plot_insert_size(self, insert_size_files=[], direction='fr', ref_header='reference', output_base='reads'):
        self.print_start_of_process_pid()

        if insert_size_files:
            type_output_header = os.path.join(self.get_table_dir(),self.__concatenate_file_name(output_base, ref_header))
            plot_output_header = os.path.join(self.get_chart_dir(),self.__concatenate_file_name(output_base, ref_header))

            cmd_list = ['plot_insert_size.py', '-o', plot_output_header,'-m', type_output_header, '-d', direction] + insert_size_files
            
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 19
0
    def run_scaffold_accuracy(self, ref=None, query=None, name=None):
        self.print_start_of_process_pid()

        if ref and query:
            if not name:
                name = self.get_output_header()
            cmd_list = ['run_scaffold_accuracy.py', '-o', os.path.join(self.get_work_dir(), name), '-t', os.path.join(self.get_table_dir(), name),
                        ref, query]
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
        
            self.print_end_of_process_pid()
Ejemplo n.º 20
0
    def get_bam_coverage_stats(self, bam_files=[], name=None, ref_header='reference', want_phys_cvg=False):
        self.print_start_of_process_pid()

        if bam_files:
            if not name:
                name = self.get_output_header()
            cmd_list = ['get_bam_coverage_stats.py','-o', os.path.join(self.get_table_dir(),self.__concatenate_file_name(name, ref_header))]
            if want_phys_cvg:
                cmd_list += ['-p']
            cmd_list += bam_files
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 21
0
    def compare_to_reference(self, coords_file=None, name=None):
        self.print_start_of_process_pid()

        if coords_file:
            if not name:
                name = self.get_output_header()
            table = os.path.join(self.get_table_dir(), name)
            cmd_list = ['compare_to_reference.py', '-o',table, '-n', '-c', coords_file]

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            
            self.print_end_of_process_pid()
Ejemplo n.º 22
0
    def identify_coverage_anomalies(self, bam_files=[], name=None, ref_header='reference', window_size=1000):
        self.print_start_of_process_pid()

        if bam_files and ref_header:
            if not name:
                name = self.get_output_header()
            analysis_output_header = self.__concatenate_file_name(name, ref_header)
            coverage_anomalies = os.path.join(self.get_chart_dir(), self.__concatenate_file_name(analysis_output_header, "coverage_anomalies"))
            cmd_list = ['identify_coverage_anomalies.py','--window_size', str(window_size)] + bam_files + [coverage_anomalies]

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 23
0
    def parse_blast_xml(self, blast_xml=None, no_blast_filter=False ):
        self.print_start_of_process_pid()

        if blast_xml:
            parse_output = re.sub("xml", "parsed.txt", blast_xml)
            cmd = ['parse_blast_xml.py', '-o', parse_output]
            if no_blast_filter:
                cmd += ['--no_filter']
            rc = RunCommand(cmd + [blast_xml])
            self.print_and_run_command(rc)
            
            self.print_end_of_process_pid()
            return parse_output
Ejemplo n.º 24
0
def _get_show_tilings_command(output_header, id, coverage, circular, delta):
    """
    Build up the basic show-tilings command from output header, an identity cutoff, 
    a coverage cutoff, whether or not we want circular alignments, and the nucmer
    delta file.
    """
    arg_list = [
        '-R', '-u', output_header + '.unplaced', "-i", id, "-v", coverage
    ]
    if circular:
        arg_list += ["-c"]

    return RunCommand(_build_showtiling_command(delta, arg_list))
Ejemplo n.º 25
0
    def analyze_gap_ends(self, name=None, contigs=None, agp=None, extension='analyze_gap_ends'):
        self.print_start_of_process_pid()
        
        if contigs and agp:
            if not name:
                name = self.get_output_header()
            output = os.path.join(self.get_table_dir(), name)
            chart = self.__concatenate_file_name(os.path.join(self.get_chart_dir(), name), extension)

            cmd_list = ['analyze_gap_ends.py', '-c', chart, '-t', output, contigs, agp]

            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)
            self.print_end_of_process_pid()
Ejemplo n.º 26
0
 def blast_bubbles(self, name=None, taxonomy_output=None, contig_detail=None):
     self.print_start_of_process_pid()
     
 
     if taxonomy_output and contig_detail:
         if not name:
             name = self.get_output_header()
         blast_bubble_output = os.path.join(self.get_chart_dir(),name)
         blast_bubble_detail = os.path.join(self.get_work_dir(),name)
         cmd_list = ['blast_bubbles.py', '-v', blast_bubble_detail,'-o', blast_bubble_output, contig_detail, taxonomy_output]
         rc = RunCommand(cmd_list)
         self.print_and_run_command(rc)
         
         self.print_end_of_process_pid()
Ejemplo n.º 27
0
    def get_blast_hit_taxonomy(self, parsed_blast=None, query=None, nodes=constant.BLAST_NODES, names=constant.BLAST_NAMES, name=None):
        self.print_start_of_process_pid()

        if parsed_blast and query:
            if not name:
                name = self.get_output_header()
            tax_output = os.path.join(self.get_table_dir(), name)
            taxonomy_heatmap = re.sub("txt", "heatmap", parsed_blast)
            if nodes and names:
                rc = RunCommand(['get_blast_hit_taxonomy.py', '-o', tax_output,'-m', taxonomy_heatmap, parsed_blast, query])
                self.print_and_run_command(rc)

                self.print_end_of_process_pid()        

            return taxonomy_heatmap
Ejemplo n.º 28
0
    def blast_map(self, taxonomy_heatmap=None, agp=None, name=None):
        self.print_start_of_process_pid()

        if taxonomy_heatmap:
            if not name:
                name = self.get_output_header()
            blast_map_output = os.path.join(self.get_chart_dir(), name)

            cmd = ['blast_map.py', '-o', blast_map_output]
            if agp:
                cmd += ['-g', agp]

            cmd += [taxonomy_heatmap]
            rc = RunCommand(cmd)
            self.print_and_run_command(rc)

            self.print_end_of_process_pid()
Ejemplo n.º 29
0
    def run_insert_size(self, bam_file=None, insert_size=None, std_dev=None):
        self.print_start_of_process_pid()

        if bam_file:
            output_header = re.sub(".bam", "", bam_file)
            cmd_list = ['run_insert_size_from_bam.py','-o',output_header]
            if insert_size:
                cmd_list += ['-i', str(insert_size)]
                if std_dev:
                    cmd_list += ['-s', str(std_dev)]
                   
                cmd_list += [bam_file]
                    
            rc = RunCommand(cmd_list)
            self.print_and_run_command(rc)            
            self.print_end_of_process_pid()
            
            return self.__concatenate_file_name(output_header, 'insert_size.metrics')
Ejemplo n.º 30
0
    def get_basic_assembly_stats(self, name=None, contigs=None, agp=None, assembler='assembler', extension='cumulative_sizes', extra_args = []):
        self.print_start_of_process_pid()

        if not name:
            name = self.get_output_header()
        output = os.path.join(self.get_table_dir(), name)
        chart = self.__concatenate_file_name(os.path.join(self.get_chart_dir(), name), extension)        
        cmd_list = ['basic_assembly_stats.py','-n',name,'-a',assembler,'-o', output,'-C','-S','-t',chart] + extra_args

        if contigs and agp:
            cmd_list += ['-f', agp, contigs]
        elif not agp:
            cmd_list += [contigs]
        else:
            print "No file given for assembly stats."
            return
        
        rc = RunCommand(cmd_list)
        self.print_and_run_command(rc)

        self.print_end_of_process_pid()