Beispiel #1
0
def main() :
    if (len(argv) < 2) or (argv[1] in ('-h', '--help', 'help')) :
        usage()
        return 1

    command = argv[1]
    if command not in get_commands() :
        print >> stderr, "Error: unknown command '%s'" % command
        usage()
        return 1

    if command == 'test' :
        test_system(output=True)
        return 0

    options = parse_args(command, argv[2:])
    log = setup_logging(options['verbose'])
    check_options(command, options, log)

    test_system(command, options, exit_on_failure=True)
    System.tempdir(options['outdir']) # some objects need this set

    wf = WorkFlow(options)

    if command == 'preprocess' :
        return wf.preprocess()
 
    elif command == 'summary' :
        return wf.summary()

    elif command == 'cluster' :
        return wf.cluster()

    elif command == 'label' :
        return wf.label()

    elif command == 'showcounts' :
        return wf.showcounts()

    elif command == 'showlabels' :
        return wf.showlabels()

    elif command == 'phylogeny' :
        return wf.phylogeny()
    
    elif command == 'heatmap' :
        return wf.heatmap()

    elif command == 'wasabi' :
        return wf.wasabi()

    else :
        print >> stderr, "'%s' appears to be partially implemented!" % command

    return 1
Beispiel #2
0
    def alignment_similarity(self, seq1, seq2, homopolymer_correction) :
        # write out
        f = open(System.tempfilename(ext='cluster'), 'w')

        print >> f, seq1.fasta()
        print >> f, seq2.fasta()

        f.close()

        # align
        aligned = []
        if homopolymer_correction :
            fq = Pagan().get_454_alignment(f.name)
        else :
            fq = Pagan().get_alignment(f.name)
        
        fq.open()

        for seq in fq :
            if seq.id == ">consensus" :
                continue

            aligned.append(seq.sequence)

        fq.close()

        # delete tmp files
        os.remove(f.name)
        os.remove(fq.get_filename())

        # if things are really dissimilar they do not align
        # so just give up here for this cluster
        if len(aligned) != 2 :
            return 0.0

        return self.distance2(aligned, homopolymer_correction)
Beispiel #3
0
def check_options(command, options, log) :
    system = System()

    apply_prefix(options, command)

#    for i in options :
#        print i, options[i]

    if (command != 'preprocess') and (not system.check_directory(options['outdir'])) :
        exit(1)

    if command == 'preprocess' :
        if not system.check_directory(options['outdir'], create=True) :
            exit(1)

        if not system.check_files(options['input-files']) :
            exit(1)

        if options['denoise'] :
            if not options['forwardprimer'] :
                log.error("for denoising you must specify the forward primer!")
                exit(1)

    elif command == 'cluster' :
        #if options['metadata'] is None :
        #    print >> stderr, "Error: you must specify a metadata file"
        #    exit(1)

        if (options['metadata'] is not None) and (not system.check_file(options['metadata'])) :
            exit(1)

        for i in ('duplicate-threshold', 'total-duplicate-threshold', 'sample-threshold') :
            if options[i] <= 0 :
                log.error("%s must be > 0 (read %d)" % (i, options[i]))
                exit(1)

        # i think pagan's sensitivity limit is ~80%
        if options['otu-similarity'] < 0.8 or options['otu-similarity'] > 1.0 :
            log.error("similarity must be between 0.8 and 1.0 (read %.2f)" % options['otu-similarity'])
            exit(1)

        if options['labels'] :
            if options['labels'] == 'blastlocal' :
                if not options['labels_db'] :
                    log.error("'blastlocal' requires you specify a FASTA file to use as a blast database using the --dbfile option")
                    exit(1)

                if not system.check_file(options['labels_db']) :
                    log.error("could not find %s" % options['labels_db'])
                    exit(1)

    elif command == 'summary' :
        if not system.check_file(options['summary-file']) :
            log.error("could not find %s, did you run the 'preprocess' command yet?" % options['summary-file'])
            exit(1)

    elif command == 'label' :
        fasta_check = system.check_file(options['cluster-fasta'])
        biom_check  = system.check_file(options['cluster-biom'])

        def filename_or() :
            if fasta_check ^ biom_check :
                return "'%s'" % (options['cluster-fasta'] if not fasta_check else options['cluster-biom'])
            else :
                return "'%s' or '%s'" % (options['cluster-fasta'], options['cluster-biom'])

        if not fasta_check or not biom_check :
            log.error("could not find %s, did you run the 'cluster' command yet?" % filename_or())
            exit(1)

        if not options['labels'] :
            log.error("you must specify a labelling method")
            exit(1)

        if options['labels'] == 'blastlocal' :
            if not options['labels_db'] :
                log.error("'blastlocal' requires you specify a FASTA file to use as a blast database using the --dbfile option")
                exit(1)

            if not system.check_file(options['labels_db']) :
                log.error("could not find %s" % options['labels_db'])
                exit(1)

    elif command == 'showcounts' or command == 'showlabels' :
        if not system.check_file(options['cluster-biom']) :
            log.error("could not find '%s', did you run the 'cluster' command yet?" % options['cluster-biom'])
            exit(1)

    elif command == 'phylogeny' :
        if not system.check_file(options['cluster-fasta']) :
            exit(1)

        if not options['denovo'] :
            if not options['silva-fasta'] or not options['silva-tree'] :
                log.error("you must either specify the location of the reference alignment and phylogeny or else use the --denovo option")
                exit(1)
            else :
                if not system.check_files([options['silva-fasta'], options['silva-tree']]) :
                    exit(1)

    elif command == 'heatmap' :
        if not system.check_files([options['cluster-biom']]) :
            exit(1)

    elif command == 'wasabi' :
        if not options['wasabi-user'] :
            log.error("you must specify your wasabi username!")
            exit(1)

        if not system.check_files([options['phylogeny-xml']]) :
            exit(1)
Beispiel #4
0
def test_system(command=None, options=None, exit_on_failure=False, output=False) :
    binaries = { 
        'preprocess' : {
            '*'         : ['sff2fastq'],
            'chimeras'  : ['uchime'],
            'denoise'   : ['PyroDist', 'FCluster', 'PyroNoise'],
        },
        'cluster' : {
            '*'         : ['pagan'],
            'labels'    : ['blastn', 'makeblastdb']
        },
        'label' : {
            '*'         : ['blastn', 'makeblastdb']
        },
        'summary' : {},
        'showcounts' : {},
        'showlabels' : {},
        'phylogeny' : {
            '*'         : ['pagan', 'exonerate', 'bppphysamp'],
            'denovo'    : ['raxml']
        },
        'heatmap' : {},
        'wasabi' : {},
        'test' : {}
    }

    def print_out(s) :
        if output :
            print s
        else :
            if 'not found' in s :
                print >> stderr, s.lstrip()

    fail = False

    for b in [command] if command else binaries :
        if binaries[b] :
            print_out("checking system for %s command dependancies :" % bold(b))
            for o in binaries[b] :
                if o == '*' or not options or options[o] :
                    for p in binaries[b][o] :
                        installed = System.is_installed(p)
                        print_out("    %s %s%s" % (p, "" if o == "*" else "(needed for --%s) " % o, 
                            bold_green("found.") if installed else bold_red("not found!")))
                    
                    if not installed :
                        fail = True    
            print_out("")

    # for some reason using pip to install cairo always fails
    # so check for it here instead
#    print "checking for python modules :"
#    try :
#        import cairo
#        print "    pycairo (needed by 'heatmap' command) " + bold_green("found.")
#    except ImportError :
#        print "    pycairo (needed by 'heatmap' command) " + bold_red("not found!")
#        fail = True
#    print ""

    if exit_on_failure and fail :
        exit(1)