Beispiel #1
0
def parallel_pick_otus_process_run_results_f(f):
    """ Copy each list of infiles to each outfile and delete infiles
    
        f: file containing one set of mapping instructions per line
        
        example f:
         f1.txt f2.txt f3.txt f_combined.txt
         f1.log f2.log f3.log f_combined.log
         f1_failures.txt f2_failures.txt f3_failures.txt f_failires.txt
         
        If f contained the two lines above, this function would 
         concatenate f1.txt, f2.txt, and f3.txt into f_combined.txt
         and f1.log, f2.log, and f3.log into f_combined.log
    """
    lines = list(f)
    # handle catting of log files and failure files
    basic_process_run_results_f([lines[1]])
    try:
        basic_process_run_results_f([lines[2]])
    except IndexError:
        # no failures files were generated (BLAST
        # doesn't create these)
        pass
    # handle merging of otu maps
    fields = lines[0].strip().split()
    infiles_list = fields[:-1]
    out_filepath = fields[-1] 
    try:
        of = open(out_filepath,'w')
    except IOError:
        raise IOError,\
         "Poller can't open final output file: %s" % out_filepath  +\
         "\nLeaving individual jobs output.\n Do you have write access?"

    unique_otu_map = {}
    for fp in infiles_list:
        for line in open(fp):
            fields = line.strip().split()
            try:
                # current otu_id already exists, so append this
                # set of seq_ids
                unique_otu_map[fields[0]] += fields[1:]
            except KeyError:
                # current otu_id has not been seen yet, so 
                # create it with the current set of otus
                unique_otu_map[fields[0]] = fields[1:]
    
    for otu_id, seq_ids in unique_otu_map.items():
        of.write('\t'.join([otu_id] + seq_ids))
        of.write('\n')            
    of.close()
    
    # It is a good idea to have your clean_up_callback return True.
    # That way, if you get mixed up and pass it as check_run_complete_callback, 
    # you'll get an error right away rather than going into an infinite loop
    return True
Beispiel #2
0
def parallel_uclust_ref_process_run_results_f(f):
    """ Copy each list of infiles to each outfile and delete infiles
    
        f: file containing one set of mapping instructions per line
        
        example f:
         f1.txt f2.txt f3.txt f_combined.txt
         f1.log f2.log f3.log f_combined.log
         f1_failures.txt f2_failures.txt f3_failures.txt f_failires.txt
         
        If f contained the two lines above, this function would 
         concatenate f1.txt, f2.txt, and f3.txt into f_combined.txt
         and f1.log, f2.log, and f3.log into f_combined.log
    """
    lines = list(f)
    # handle catting of log files and failure files
    basic_process_run_results_f([lines[1], lines[2]])
    # # handle catting of failures files
    # basic_process_run_results_f([lines[2]])
    # handle merging of otu maps
    fields = lines[0].strip().split()
    infiles_list = fields[:-1]
    out_filepath = fields[-1]
    try:
        of = open(out_filepath, 'w')
    except IOError:
        raise IOError,\
         "Poller can't open final output file: %s" % out_filepath  +\
         "\nLeaving individual jobs output.\n Do you have write access?"

    unique_otu_map = {}
    for fp in infiles_list:
        for line in open(fp):
            fields = line.strip().split()
            try:
                # current otu_id already exists, so append this
                # set of seq_ids
                unique_otu_map[fields[0]] += fields[1:]
            except KeyError:
                # current otu_id has not been seen yet, so
                # create it with the current set of otus
                unique_otu_map[fields[0]] = fields[1:]

    for otu_id, seq_ids in unique_otu_map.items():
        of.write('\t'.join([otu_id] + seq_ids))
        of.write('\n')
    of.close()

    # It is a good idea to have your clean_up_callback return True.
    # That way, if you get mixed up and pass it as check_run_complete_callback,
    # you'll get an error right away rather than going into an infinite loop
    return True
Beispiel #3
0
def parallel_pick_otus_trie_process_run_results_f(f):
    """ Copy each list of infiles to each outfile and delete infiles

        f: file containing one set of mapping instructions per line

        example f:
         f1.txt f2.txt f3.txt f_combined.txt
         f1.log f2.log f3.log f_combined.log
         f1_failures.txt f2_failures.txt f3_failures.txt f_failires.txt

         If f contained the two lines above, this function would
         concatenate f1.txt, f2.txt, and f3.txt into f_combined.txt
         and f1.log, f2.log, and f3.log into f_combined.log

         Note: this is mostly copied from
               parallel_pick_otus_process_run_results_f
               The only difference is the way how otu_maps are combined.
               Since each otu_map for the parallel trie otu pickers yields disjoint
               sets, we have to make the otu_ids disjoint as well.
    """
    lines = list(f)
    # handle catting of log files and failure files
    basic_process_run_results_f([lines[1]])
    try:
        basic_process_run_results_f([lines[2]])
        basic_process_run_results_f([lines[3]])
    except IndexError:
        # no failures files or blast6 were generated (BLAST
        # doesn't create these)
        pass
    # handle merging of otu maps
    fields = lines[0].strip().split()
    infiles_list = fields[:-1]
    out_filepath = fields[-1]
    try:
        of = open(out_filepath, 'w')
    except IOError:
        raise IOError(
            "Poller can't open final output file: %s" % out_filepath +
            "\nLeaving individual jobs output.\n Do you have write access?")

    unique_otu_map = {}
    otu_id = 0
    for fp in infiles_list:
        for line in open(fp):
            fields = line.strip().split()
            of.write('\t'.join(["%d" % otu_id] + fields[1:]))
            of.write('\n')
            otu_id += 1
    of.close()

    # It is a good idea to have your clean_up_callback return True.
    # That way, if you get mixed up and pass it as check_run_complete_callback,
    # you'll get an error right away rather than going into an infinite loop
    return True
Beispiel #4
0
def parallel_pick_otus_trie_process_run_results_f(f):
    """ Copy each list of infiles to each outfile and delete infiles
    
        f: file containing one set of mapping instructions per line
        
        example f:
         f1.txt f2.txt f3.txt f_combined.txt
         f1.log f2.log f3.log f_combined.log
         f1_failures.txt f2_failures.txt f3_failures.txt f_failires.txt
         
         If f contained the two lines above, this function would 
         concatenate f1.txt, f2.txt, and f3.txt into f_combined.txt
         and f1.log, f2.log, and f3.log into f_combined.log
         
         Note: this is mostly copied from 
               parallel_pick_otus_process_run_results_f
               The only difference is the way how otu_maps are combined.
               Since each otu_map for the parallel trie otu pickers yields disjoint
               sets, we have to make the otu_ids disjoint as well.
    """
    lines = list(f)
    # handle catting of log files and failure files
    basic_process_run_results_f([lines[1]])
    try:
        basic_process_run_results_f([lines[2]])
        basic_process_run_results_f([lines[3]])
    except IndexError:
        # no failures files or blast6 were generated (BLAST
        # doesn't create these)
        pass
    # handle merging of otu maps
    fields = lines[0].strip().split()
    infiles_list = fields[:-1]
    out_filepath = fields[-1] 
    try:
        of = open(out_filepath,'w')
    except IOError:
        raise IOError,\
         "Poller can't open final output file: %s" % out_filepath  +\
         "\nLeaving individual jobs output.\n Do you have write access?"

    unique_otu_map = {}
    otu_id = 0
    for fp in infiles_list:
        for line in open(fp):
            fields = line.strip().split()
            of.write('\t'.join(["%d" % otu_id] + fields[1:]))
            of.write('\n')            
            otu_id += 1
    of.close()
    
    # It is a good idea to have your clean_up_callback return True.
    # That way, if you get mixed up and pass it as check_run_complete_callback, 
    # you'll get an error right away rather than going into an infinite loop
    return True