Example #1
0
def gather():
    util.debug("Gathering result")
    job_results = results
    paths.ensure(job_results)

    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    aa_pattern = "aa" + name + "(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(name + suf)

    try:
        for pattern in patterns:
            print "Pattern ", pattern
            match = paths.find(pattern, scr_job)
            if not match:
                raise "Missing file"
            for path in match:
                if pattern == aa_pattern:
                    print "Filtering Columns %s" % path
                    for line in fileinput.input(path, inplace=1):
                        print " " + line[:47].strip()
                    fileinput.close()
                print "Path ", path
                file = paths.getFile(path)
                dest = paths.join(job_results, file)
                util.copy(path, job_results)
                if pattern == aa_pattern:
                    util.system("bzip2 %s" % dest)
    except:
        paths.removerf(job_results)
        raise
Example #2
0
def gather():
    util.debug("Gathering result")
    job_results = results
    paths.ensure(job_results)
    
    
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    aa_pattern = "aa"+name+"(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(name+suf)
    
    try:
        for pattern in patterns:
            print "Pattern ",pattern
            match = paths.find(pattern, scr_job)
            if not match:
                raise "Missing file"
            for path in match:
                if pattern == aa_pattern:
                    print "Filtering Columns %s" % path
                    for line in fileinput.input(path, inplace=1):
                        print " "+line[:47].strip()
                    fileinput.close()
                print "Path ", path
                file = paths.getFile(path)
                dest = paths.join(job_results, file)
                util.copy(path, job_results)
                if pattern == aa_pattern:
                    util.system("bzip2 %s"  % dest)
    except:
        paths.removerf(job_results)
        raise
Example #3
0
def options(argv):
    global home, shareware,scr, scr_job, fasta_file, code, results, name, cleanup, task_id, no_homs

    if len(argv) < 3:
        usage()
        sys.exit(2)
        
    home = os.getcwd()
    shareware = paths.join(home, "tools")
    
    opts, args = getopt.getopt(argv, "t:p:cn")
    code = args[0]
    fasta_file = args[1]
    results = args[2]
    
    for o,a in opts:
        if o == '-t':
            id ='%(#)03d' % {"#": int(a)}
            task_id = id
        if o == '-p':
            loadProps(a)
        if o == '-c':
            cleanup = False
        if o == '-n':
            no_homs = True
    
    if task_id == None:
        try:
            id ='%(#)03d' % {"#": int(os.environ['PBS_ARRAYID'])-1}
            print "ID",id
            task_id = id
        except:
            util.error("NO TASK ID")

    name = "%s%s" % (code, task_id)
    print code," ",task_id," ",name
    try:
        fasta_file = fasta_file.replace("%s", name)
    except:
        raise
    
    results = paths.join(results, code, name)
    scr_job = paths.join(scr, "jobs", name)
    return
Example #4
0
def options(argv):
    global home, shareware, scr, scr_job, fasta_file, code, results, name, cleanup, task_id, no_homs

    if len(argv) < 3:
        usage()
        sys.exit(2)

    home = os.getcwd()
    shareware = paths.join(home, "tools")

    opts, args = getopt.getopt(argv, "t:p:cn")
    code = args[0]
    fasta_file = args[1]
    results = args[2]

    for o, a in opts:
        if o == '-t':
            id = '%(#)03d' % {"#": int(a)}
            task_id = id
        if o == '-p':
            loadProps(a)
        if o == '-c':
            cleanup = False
        if o == '-n':
            no_homs = True

    if task_id == None:
        try:
            id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID']) - 1}
            print "ID", id
            task_id = id
        except:
            util.error("NO TASK ID")

    name = "%s%s" % (code, task_id)
    print code, " ", task_id, " ", name
    try:
        fasta_file = fasta_file.replace("%s", name)
    except:
        raise

    results = paths.join(results, code, name)
    scr_job = paths.join(scr, "jobs", name)
    return
Example #5
0
def replicate_fasta():
    global scr_fasta
    
    file = paths.getFile(fasta_file)
    scr_fasta = paths.join(scr_job, file)
    if not paths.exists(scr_fasta):
        util.debug("Replicating fasta file")
        util.copy(fasta_file, scr_fasta)
        
    paths.existsOrFail(scr_fasta)
Example #6
0
def replicate_fasta():
    global scr_fasta

    file = paths.getFile(fasta_file)
    scr_fasta = paths.join(scr_job, file)
    if not paths.exists(scr_fasta):
        util.debug("Replicating fasta file")
        util.copy(fasta_file, scr_fasta)

    paths.existsOrFail(scr_fasta)
Example #7
0
def gather():
    global code, task_id

    utilities.debug("Gathering result")
    paths.ensure(results_dir)
    
    # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG:
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    
    # Set "patterns" for results files.
    aa_pattern = "aa"+code+task_id+"(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(code+task_id+suf)
    
    try:
        for pattern in patterns:
            print "Pattern ",pattern
            
            # Find the results file in scr_job_dir
            match = paths.find(pattern, scr_job_dir)
            
            # If a results file matching pattern is not found in the scr_jobs_dir, raise exception.
            if not match:
                raise "Missing file"
            
            # Copy all results files in scr_job_dir to results_dir. 
            for path in match:
                # If the results file is a frag library, format the frag library file.
                if pattern == aa_pattern:
                    print "Filtering Columns of file %s" % path
                    # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be
                    # altered "in place": file is backed up, and stdout is then directed to the file. 
                    # Filters the file: writes " " + first 47 chars of the line. (Specific formatting).
                    for line in fileinput.input(path, inplace=1):
                        print " "+line[:47].strip()
                    fileinput.close()
                print "Path ", path
                
                # Copy the result file to results_dir.
                utilities.copy(path, results_dir)
                
                # If the results file is a frag library, bzip it in the results_dir.
                if pattern == aa_pattern:
                    dest = paths.join(results_dir, paths.getFile(path))
                    utilities.system("bzip2 %s"  % dest)
                    
    # If gathering the results files fails, delete the results_dir and exit.
    except:
        paths.removerf(results_dir)
        raise
Example #8
0
def gather():
    global code, task_id

    utilities.debug("Gathering result")
    paths.ensure(results_dir)

    # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG:
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred

    # Set "patterns" for results files.
    aa_pattern = "aa" + code + task_id + "(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(code + task_id + suf)

    try:
        for pattern in patterns:
            print "Pattern ", pattern

            # Find the results file in scr_job_dir
            match = paths.find(pattern, scr_job_dir)

            # If a results file matching pattern is not found in the scr_jobs_dir, raise exception.
            if not match:
                raise "Missing file"

            # Copy all results files in scr_job_dir to results_dir.
            for path in match:
                # If the results file is a frag library, format the frag library file.
                if pattern == aa_pattern:
                    print "Filtering Columns of file %s" % path
                    # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be
                    # altered "in place": file is backed up, and stdout is then directed to the file.
                    # Filters the file: writes " " + first 47 chars of the line. (Specific formatting).
                    for line in fileinput.input(path, inplace=1):
                        print " " + line[:47].strip()
                    fileinput.close()
                print "Path ", path

                # Copy the result file to results_dir.
                utilities.copy(path, results_dir)

                # If the results file is a frag library, bzip it in the results_dir.
                if pattern == aa_pattern:
                    dest = paths.join(results_dir, paths.getFile(path))
                    utilities.system("bzip2 %s" % dest)

    # If gathering the results files fails, delete the results_dir and exit.
    except:
        paths.removerf(results_dir)
        raise
Example #9
0
def runScript(no_homs=False):
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    utilities.debug("Running fragmentation script")
    
    env = {
           'BLAST_DIR':'%s/blast' % tools_dir,
           'NR_DIR':'%s/db/nr' % scr_dir,
           'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr_dir,
           'NNMAKE_SHORT_DIR':'%s/nnmake' % tools_dir,
           'PSIPRED_DIR':'%s/psipred' % tools_dir,
           'JUFO_DIR':'%s/jufo' % tools_dir,
           'SAM_DIR':'%s/sam' % tools_dir,
           'SAM_2ND_DIR':'%s/sam.predict-2nd' % tools_dir
           }
    
    for key in env :
        # Check if all paths given in env exist.
        paths.existsOrFail(env[key])
        
        # os.environ is the system environment. New env. vars. can be set by adding to it.
        os.environ[key] = env[key] 
    
    script = paths.join(home, "scripts", frag_script)
    
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(fasta_file)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta)
    utilities.system(cmd)
Example #10
0
def runScript(no_homs=False):
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    utilities.debug("Running fragmentation script")

    env = {
        'BLAST_DIR': '%s/blast' % tools_dir,
        'NR_DIR': '%s/db/nr' % scr_dir,
        'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr_dir,
        'NNMAKE_SHORT_DIR': '%s/nnmake' % tools_dir,
        'PSIPRED_DIR': '%s/psipred' % tools_dir,
        'JUFO_DIR': '%s/jufo' % tools_dir,
        'SAM_DIR': '%s/sam' % tools_dir,
        'SAM_2ND_DIR': '%s/sam.predict-2nd' % tools_dir
    }

    for key in env:
        # Check if all paths given in env exist.
        paths.existsOrFail(env[key])

        # os.environ is the system environment. New env. vars. can be set by adding to it.
        os.environ[key] = env[key]

    script = paths.join(home, "scripts", frag_script)

    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(fasta_file)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta)
    utilities.system(cmd)
Example #11
0
def runScript():
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    util.debug("Running fragmentation script")
    
    env = {
           'BLAST_DIR':'%s/blast' % shareware,
           'NR_DIR':'%s/db/nr' % scr,
           'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr,
           'NNMAKE_SHORT_DIR':'%s/nnmake' % shareware,
           'PSIPRED_DIR':'%s/psipred' % shareware,
           'JUFO_DIR':'%s/jufo' % shareware,
           'SAM_DIR':'%s/sam' % shareware,
           'SAM_2ND_DIR':'%s/sam.predict-2nd' % shareware
           }
    
    for key in env :
        # Make sure all paths in 'env' exist in the system.
        paths.existsOrFail(env[key])
        # Put 'env' values into the system environment.
        os.environ[key] = env[key] 
    
    script = paths.join(home, "scripts", pl_script)
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(scr_fasta)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta)
    util.system(cmd)
Example #12
0
def runScript():
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    util.debug("Running fragmentation script")

    env = {
        'BLAST_DIR': '%s/blast' % shareware,
        'NR_DIR': '%s/db/nr' % scr,
        'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr,
        'NNMAKE_SHORT_DIR': '%s/nnmake' % shareware,
        'PSIPRED_DIR': '%s/psipred' % shareware,
        'JUFO_DIR': '%s/jufo' % shareware,
        'SAM_DIR': '%s/sam' % shareware,
        'SAM_2ND_DIR': '%s/sam.predict-2nd' % shareware
    }

    for key in env:
        # Make sure all paths in 'env' exist in the system.
        paths.existsOrFail(env[key])
        # Put 'env' values into the system environment.
        os.environ[key] = env[key]

    script = paths.join(home, "scripts", pl_script)
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(scr_fasta)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta)
    util.system(cmd)
Example #13
0
def main():
    global code, task_id, fasta_file, results_dir, scr_dir, scr_job_dir_dir, home, tools_dir
    
    # Create a parser and add command-line options.
    parser = OptionParser(usage=usage_str)
    set_options(parser)
    
    # Parse command-line options and arguments (does automatically on sys.argv[1:]).
    (options, args) = parser.parse_args()
    
    # Parse required positional arguments (stored in args via parse_args()).
    parse_positional_args(args, parser)

    # If task_id is specified, set to a 3-digit integer. If not, try to grab task_id from PBS env. var.
    if (options.task_id):
        task_id = '%(#)03d' % {"#": options.task_id}
    else:
        try:
            task_id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID'])-1}
        except:
            raise Exception("No Task ID specified or in system.")
    
    # If properties file is specified, print usage and exit (not supported).
    if (options.properties_file):
        print "Specifying a properties file not currently supported. See usage."
        parser.print_help()
        sys.exit(1)
    
    
    # REPLACE THIS with getting fasta file to work on from tasks.pickle. How?
    # Replace '%s' in input fasta_file with code+task_id.
    try:
        fasta_file = fasta_file.replace("%s", code+task_id)
    except:
        print "Error: fasta_file {0} does not contain required \%s.".format(fasta_file)
        raise
    
    # Set results_dir to format: <results_dir>/<code>/<code>+<task_id>
    results_dir = os.path.join(results_dir, code, code+task_id)

    # Set job scratch directory to format: <scr_dir>/job/<code>+<task_id>
    scr_job_dir = os.path.join(scr_dir, "job", code+task_id)

    # TODO: Get rid of all this scr_fasta stuff, and just do the copy (see notes).
    # Set scr_fasta to <scr_job_dir><fasta_file filename only>
    scr_fasta = os.path.join(scr_job_dir, paths.getFile(fasta_file))

    # Sets home to CWD and tools_dir to CWD/tools. Hardcoded.
    # TODO: when make_fragments.local is incorporated, no tools will be used this way (instead, they'll all come from the Env.)
    home = os.getcwd()
    tools_dir = paths.join(home, "tools")
    
    # Run main fragmenting functionality.
    try:
        # Copy fasta input to scratch dir
        replicate()
        
        # Run fragment making script
        runScript(no_homs = options.nohoms)
        
        # Copy results to results dir
        gather()
        
    finally:
        if (options.cleanup):
            cleanup()
    
    utilities.debug("FINISHED!")
Example #14
0
def main():
    global code, task_id, fasta_file, results_dir, scr_dir, scr_job_dir_dir, home, tools_dir

    # Create a parser and add command-line options.
    parser = OptionParser(usage=usage_str)
    set_options(parser)

    # Parse command-line options and arguments (does automatically on sys.argv[1:]).
    (options, args) = parser.parse_args()

    # Parse required positional arguments (stored in args via parse_args()).
    parse_positional_args(args, parser)

    # If task_id is specified, set to a 3-digit integer. If not, try to grab task_id from PBS env. var.
    if (options.task_id):
        task_id = '%(#)03d' % {"#": options.task_id}
    else:
        try:
            task_id = '%(#)03d' % {"#": int(os.environ['PBS_ARRAYID']) - 1}
        except:
            raise Exception("No Task ID specified or in system.")

    # If properties file is specified, print usage and exit (not supported).
    if (options.properties_file):
        print "Specifying a properties file not currently supported. See usage."
        parser.print_help()
        sys.exit(1)

    # REPLACE THIS with getting fasta file to work on from tasks.pickle. How?
    # Replace '%s' in input fasta_file with code+task_id.
    try:
        fasta_file = fasta_file.replace("%s", code + task_id)
    except:
        print "Error: fasta_file {0} does not contain required \%s.".format(
            fasta_file)
        raise

    # Set results_dir to format: <results_dir>/<code>/<code>+<task_id>
    results_dir = os.path.join(results_dir, code, code + task_id)

    # Set job scratch directory to format: <scr_dir>/job/<code>+<task_id>
    scr_job_dir = os.path.join(scr_dir, "job", code + task_id)

    # TODO: Get rid of all this scr_fasta stuff, and just do the copy (see notes).
    # Set scr_fasta to <scr_job_dir><fasta_file filename only>
    scr_fasta = os.path.join(scr_job_dir, paths.getFile(fasta_file))

    # Sets home to CWD and tools_dir to CWD/tools. Hardcoded.
    # TODO: when make_fragments.local is incorporated, no tools will be used this way (instead, they'll all come from the Env.)
    home = os.getcwd()
    tools_dir = paths.join(home, "tools")

    # Run main fragmenting functionality.
    try:
        # Copy fasta input to scratch dir
        replicate()

        # Run fragment making script
        runScript(no_homs=options.nohoms)

        # Copy results to results dir
        gather()

    finally:
        if (options.cleanup):
            cleanup()

    utilities.debug("FINISHED!")