def gather(): util.debug("Gathering result") job_results = results paths.ensure(job_results) #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred aa_pattern = "aa" + name + "(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(name + suf) try: for pattern in patterns: print "Pattern ", pattern match = paths.find(pattern, scr_job) if not match: raise "Missing file" for path in match: if pattern == aa_pattern: print "Filtering Columns %s" % path for line in fileinput.input(path, inplace=1): print " " + line[:47].strip() fileinput.close() print "Path ", path file = paths.getFile(path) dest = paths.join(job_results, file) util.copy(path, job_results) if pattern == aa_pattern: util.system("bzip2 %s" % dest) except: paths.removerf(job_results) raise
def gather(): util.debug("Gathering result") job_results = results paths.ensure(job_results) #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred aa_pattern = "aa"+name+"(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(name+suf) try: for pattern in patterns: print "Pattern ",pattern match = paths.find(pattern, scr_job) if not match: raise "Missing file" for path in match: if pattern == aa_pattern: print "Filtering Columns %s" % path for line in fileinput.input(path, inplace=1): print " "+line[:47].strip() fileinput.close() print "Path ", path file = paths.getFile(path) dest = paths.join(job_results, file) util.copy(path, job_results) if pattern == aa_pattern: util.system("bzip2 %s" % dest) except: paths.removerf(job_results) raise
def options(argv): global home, shareware,scr, scr_job, fasta_file, code, results, name, cleanup, task_id, no_homs if len(argv) < 3: usage() sys.exit(2) home = os.getcwd() shareware = paths.join(home, "tools") opts, args = getopt.getopt(argv, "t:p:cn") code = args[0] fasta_file = args[1] results = args[2] for o,a in opts: if o == '-t': id ='%(#)03d' % {"#": int(a)} task_id = id if o == '-p': loadProps(a) if o == '-c': cleanup = False if o == '-n': no_homs = True if task_id == None: try: id ='%(#)03d' % {"#": int(os.environ['PBS_ARRAYID'])-1} print "ID",id task_id = id except: util.error("NO TASK ID") name = "%s%s" % (code, task_id) print code," ",task_id," ",name try: fasta_file = fasta_file.replace("%s", name) except: raise results = paths.join(results, code, name) scr_job = paths.join(scr, "jobs", name) return
def options(argv): global home, shareware, scr, scr_job, fasta_file, code, results, name, cleanup, task_id, no_homs if len(argv) < 3: usage() sys.exit(2) home = os.getcwd() shareware = paths.join(home, "tools") opts, args = getopt.getopt(argv, "t:p:cn") code = args[0] fasta_file = args[1] results = args[2] for o, a in opts: if o == '-t': id = '%(#)03d' % {"#": int(a)} task_id = id if o == '-p': loadProps(a) if o == '-c': cleanup = False if o == '-n': no_homs = True if task_id == None: try: id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID']) - 1} print "ID", id task_id = id except: util.error("NO TASK ID") name = "%s%s" % (code, task_id) print code, " ", task_id, " ", name try: fasta_file = fasta_file.replace("%s", name) except: raise results = paths.join(results, code, name) scr_job = paths.join(scr, "jobs", name) return
def replicate_fasta(): global scr_fasta file = paths.getFile(fasta_file) scr_fasta = paths.join(scr_job, file) if not paths.exists(scr_fasta): util.debug("Replicating fasta file") util.copy(fasta_file, scr_fasta) paths.existsOrFail(scr_fasta)
def gather(): global code, task_id utilities.debug("Gathering result") paths.ensure(results_dir) # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG: #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred # Set "patterns" for results files. aa_pattern = "aa"+code+task_id+"(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(code+task_id+suf) try: for pattern in patterns: print "Pattern ",pattern # Find the results file in scr_job_dir match = paths.find(pattern, scr_job_dir) # If a results file matching pattern is not found in the scr_jobs_dir, raise exception. if not match: raise "Missing file" # Copy all results files in scr_job_dir to results_dir. for path in match: # If the results file is a frag library, format the frag library file. if pattern == aa_pattern: print "Filtering Columns of file %s" % path # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be # altered "in place": file is backed up, and stdout is then directed to the file. # Filters the file: writes " " + first 47 chars of the line. (Specific formatting). for line in fileinput.input(path, inplace=1): print " "+line[:47].strip() fileinput.close() print "Path ", path # Copy the result file to results_dir. utilities.copy(path, results_dir) # If the results file is a frag library, bzip it in the results_dir. if pattern == aa_pattern: dest = paths.join(results_dir, paths.getFile(path)) utilities.system("bzip2 %s" % dest) # If gathering the results files fails, delete the results_dir and exit. except: paths.removerf(results_dir) raise
def gather(): global code, task_id utilities.debug("Gathering result") paths.ensure(results_dir) # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG: #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred # Set "patterns" for results files. aa_pattern = "aa" + code + task_id + "(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(code + task_id + suf) try: for pattern in patterns: print "Pattern ", pattern # Find the results file in scr_job_dir match = paths.find(pattern, scr_job_dir) # If a results file matching pattern is not found in the scr_jobs_dir, raise exception. if not match: raise "Missing file" # Copy all results files in scr_job_dir to results_dir. for path in match: # If the results file is a frag library, format the frag library file. if pattern == aa_pattern: print "Filtering Columns of file %s" % path # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be # altered "in place": file is backed up, and stdout is then directed to the file. # Filters the file: writes " " + first 47 chars of the line. (Specific formatting). for line in fileinput.input(path, inplace=1): print " " + line[:47].strip() fileinput.close() print "Path ", path # Copy the result file to results_dir. utilities.copy(path, results_dir) # If the results file is a frag library, bzip it in the results_dir. if pattern == aa_pattern: dest = paths.join(results_dir, paths.getFile(path)) utilities.system("bzip2 %s" % dest) # If gathering the results files fails, delete the results_dir and exit. except: paths.removerf(results_dir) raise
def runScript(no_homs=False): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction utilities.debug("Running fragmentation script") env = { 'BLAST_DIR':'%s/blast' % tools_dir, 'NR_DIR':'%s/db/nr' % scr_dir, 'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr_dir, 'NNMAKE_SHORT_DIR':'%s/nnmake' % tools_dir, 'PSIPRED_DIR':'%s/psipred' % tools_dir, 'JUFO_DIR':'%s/jufo' % tools_dir, 'SAM_DIR':'%s/sam' % tools_dir, 'SAM_2ND_DIR':'%s/sam.predict-2nd' % tools_dir } for key in env : # Check if all paths given in env exist. paths.existsOrFail(env[key]) # os.environ is the system environment. New env. vars. can be set by adding to it. os.environ[key] = env[key] script = paths.join(home, "scripts", frag_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(fasta_file) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta) utilities.system(cmd)
def runScript(no_homs=False): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction utilities.debug("Running fragmentation script") env = { 'BLAST_DIR': '%s/blast' % tools_dir, 'NR_DIR': '%s/db/nr' % scr_dir, 'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr_dir, 'NNMAKE_SHORT_DIR': '%s/nnmake' % tools_dir, 'PSIPRED_DIR': '%s/psipred' % tools_dir, 'JUFO_DIR': '%s/jufo' % tools_dir, 'SAM_DIR': '%s/sam' % tools_dir, 'SAM_2ND_DIR': '%s/sam.predict-2nd' % tools_dir } for key in env: # Check if all paths given in env exist. paths.existsOrFail(env[key]) # os.environ is the system environment. New env. vars. can be set by adding to it. os.environ[key] = env[key] script = paths.join(home, "scripts", frag_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(fasta_file) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta) utilities.system(cmd)
def runScript(): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction util.debug("Running fragmentation script") env = { 'BLAST_DIR':'%s/blast' % shareware, 'NR_DIR':'%s/db/nr' % scr, 'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr, 'NNMAKE_SHORT_DIR':'%s/nnmake' % shareware, 'PSIPRED_DIR':'%s/psipred' % shareware, 'JUFO_DIR':'%s/jufo' % shareware, 'SAM_DIR':'%s/sam' % shareware, 'SAM_2ND_DIR':'%s/sam.predict-2nd' % shareware } for key in env : # Make sure all paths in 'env' exist in the system. paths.existsOrFail(env[key]) # Put 'env' values into the system environment. os.environ[key] = env[key] script = paths.join(home, "scripts", pl_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(scr_fasta) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta) util.system(cmd)
def runScript(): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction util.debug("Running fragmentation script") env = { 'BLAST_DIR': '%s/blast' % shareware, 'NR_DIR': '%s/db/nr' % scr, 'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr, 'NNMAKE_SHORT_DIR': '%s/nnmake' % shareware, 'PSIPRED_DIR': '%s/psipred' % shareware, 'JUFO_DIR': '%s/jufo' % shareware, 'SAM_DIR': '%s/sam' % shareware, 'SAM_2ND_DIR': '%s/sam.predict-2nd' % shareware } for key in env: # Make sure all paths in 'env' exist in the system. paths.existsOrFail(env[key]) # Put 'env' values into the system environment. os.environ[key] = env[key] script = paths.join(home, "scripts", pl_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(scr_fasta) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta) util.system(cmd)
def main(): global code, task_id, fasta_file, results_dir, scr_dir, scr_job_dir_dir, home, tools_dir # Create a parser and add command-line options. parser = OptionParser(usage=usage_str) set_options(parser) # Parse command-line options and arguments (does automatically on sys.argv[1:]). (options, args) = parser.parse_args() # Parse required positional arguments (stored in args via parse_args()). parse_positional_args(args, parser) # If task_id is specified, set to a 3-digit integer. If not, try to grab task_id from PBS env. var. if (options.task_id): task_id = '%(#)03d' % {"#": options.task_id} else: try: task_id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID'])-1} except: raise Exception("No Task ID specified or in system.") # If properties file is specified, print usage and exit (not supported). if (options.properties_file): print "Specifying a properties file not currently supported. See usage." parser.print_help() sys.exit(1) # REPLACE THIS with getting fasta file to work on from tasks.pickle. How? # Replace '%s' in input fasta_file with code+task_id. try: fasta_file = fasta_file.replace("%s", code+task_id) except: print "Error: fasta_file {0} does not contain required \%s.".format(fasta_file) raise # Set results_dir to format: <results_dir>/<code>/<code>+<task_id> results_dir = os.path.join(results_dir, code, code+task_id) # Set job scratch directory to format: <scr_dir>/job/<code>+<task_id> scr_job_dir = os.path.join(scr_dir, "job", code+task_id) # TODO: Get rid of all this scr_fasta stuff, and just do the copy (see notes). # Set scr_fasta to <scr_job_dir><fasta_file filename only> scr_fasta = os.path.join(scr_job_dir, paths.getFile(fasta_file)) # Sets home to CWD and tools_dir to CWD/tools. Hardcoded. # TODO: when make_fragments.local is incorporated, no tools will be used this way (instead, they'll all come from the Env.) home = os.getcwd() tools_dir = paths.join(home, "tools") # Run main fragmenting functionality. try: # Copy fasta input to scratch dir replicate() # Run fragment making script runScript(no_homs = options.nohoms) # Copy results to results dir gather() finally: if (options.cleanup): cleanup() utilities.debug("FINISHED!")
def main(): global code, task_id, fasta_file, results_dir, scr_dir, scr_job_dir_dir, home, tools_dir # Create a parser and add command-line options. parser = OptionParser(usage=usage_str) set_options(parser) # Parse command-line options and arguments (does automatically on sys.argv[1:]). (options, args) = parser.parse_args() # Parse required positional arguments (stored in args via parse_args()). parse_positional_args(args, parser) # If task_id is specified, set to a 3-digit integer. If not, try to grab task_id from PBS env. var. if (options.task_id): task_id = '%(#)03d' % {"#": options.task_id} else: try: task_id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID']) - 1} except: raise Exception("No Task ID specified or in system.") # If properties file is specified, print usage and exit (not supported). if (options.properties_file): print "Specifying a properties file not currently supported. See usage." parser.print_help() sys.exit(1) # REPLACE THIS with getting fasta file to work on from tasks.pickle. How? # Replace '%s' in input fasta_file with code+task_id. try: fasta_file = fasta_file.replace("%s", code + task_id) except: print "Error: fasta_file {0} does not contain required \%s.".format( fasta_file) raise # Set results_dir to format: <results_dir>/<code>/<code>+<task_id> results_dir = os.path.join(results_dir, code, code + task_id) # Set job scratch directory to format: <scr_dir>/job/<code>+<task_id> scr_job_dir = os.path.join(scr_dir, "job", code + task_id) # TODO: Get rid of all this scr_fasta stuff, and just do the copy (see notes). # Set scr_fasta to <scr_job_dir><fasta_file filename only> scr_fasta = os.path.join(scr_job_dir, paths.getFile(fasta_file)) # Sets home to CWD and tools_dir to CWD/tools. Hardcoded. # TODO: when make_fragments.local is incorporated, no tools will be used this way (instead, they'll all come from the Env.) home = os.getcwd() tools_dir = paths.join(home, "tools") # Run main fragmenting functionality. try: # Copy fasta input to scratch dir replicate() # Run fragment making script runScript(no_homs=options.nohoms) # Copy results to results dir gather() finally: if (options.cleanup): cleanup() utilities.debug("FINISHED!")