コード例 #1
0
#create log file
log_name = args.logfile
if os.path.isfile(log_name):
    os.remove(log_name)

#initialize script, log system info and cmd issue at runtime
lib.setupLogging(log_name)
FNULL = open(os.devnull, 'w')
cmd_args = " ".join(sys.argv)+'\n'
lib.log.debug(cmd_args)

#create tmpdir to store fasta files and output files
TMPDIR = 'phobius_' + str(os.getpid())

#split fasta
lib.splitFASTA(args.input, TMPDIR)

#now get list of files in tmpdir
proteins = []
for file in os.listdir(TMPDIR):
    if file.endswith('.fa'):
        proteins.append(file)

#now run the script
if lib.which('phobius.pl'):
    lib.runMultiProgress(runPhobiusLocal, proteins, multiprocessing.cpu_count())
else:
    lib.runMultiProgress(runPhobiusRemote, proteins, 29) #max is 30 jobs at a time

#collect all results
phobius = []
コード例 #2
0
            subprocess.call([
                os.path.join(parentdir, 'util', 'phobius-multiproc.py'), '-i',
                Proteins, '-o', phobius_out, '-l', phobiusLog
            ])

if 'interproscan' in args.methods or 'all' in args.methods:
    IPRCombined = os.path.join(outputdir, 'annotate_misc', 'iprscan.xml')
    #run interpro scan
    IPROUT = os.path.join(outputdir, 'annotate_misc', 'iprscan')
    PROTS = os.path.join(outputdir, 'annotate_misc', 'protein_tmp')
    for i in IPROUT, PROTS:
        if not os.path.exists(i):
            os.makedirs(i)
    #now run interproscan
    #split input into individual files
    lib.splitFASTA(Proteins, PROTS)
    #now iterate over list using pool and up to 25 submissions at a time
    proteins = []
    for file in os.listdir(PROTS):
        if file.endswith('.fa'):
            file = os.path.join(PROTS, file)
            proteins.append(file)

    num_files = len(glob.glob1(IPROUT, "*.xml"))
    num_prots = len(proteins)
    lib.log.info(
        "Now running InterProScan search remotely using EBI servers on " +
        '{0:,}'.format(num_prots) + ' proteins')
    #build in a check before running (in case script gets stopped and needs to restart
    finished = []
    for file in os.listdir(IPROUT):
コード例 #3
0
    num_annotations = lib.line_count(signalp_out)
    lib.log.info('{0:,}'.format(num_annotations) + ' annotations added')
else:
    lib.log.info("SignalP not installed, skipping")

if not args.skip_iprscan:
    if not args.iprscan:
        #run interpro scan
        IPROUT = os.path.join(outputdir, 'annotate_misc', 'iprscan')
        PROTS = os.path.join(outputdir, 'annotate_misc', 'protein_tmp')
        for i in IPROUT,PROTS:
            if not os.path.exists(i):
                os.makedirs(i)
        #now run interproscan
        #split input into individual files
        lib.splitFASTA(Proteins, PROTS)

        #now iterate over list using pool and up to 25 submissions at a time
        proteins = []
        for file in os.listdir(PROTS):
            if file.endswith('.fa'):
                file = os.path.join(PROTS, file)
                proteins.append(file)
        
        num_files = len(glob.glob1(IPROUT,"*.xml"))
        num_prots = len(proteins)
        lib.log.info("Now running InterProScan search remotely using EBI servers on " + '{0:,}'.format(num_prots) + ' proteins')
        while (num_files < num_prots):
            #build in a check before running (in case script gets stopped and needs to restart
            finished = []
            for file in os.listdir(IPROUT):