Esempio n. 1
0
 def test_job_complete(self):
     """check if a job is complete"""
     t = mergeorder(['A','B','C','D','E'],'foo')
     self.assertFalse(job_complete(t))
     self.assertFalse(job_complete(t.Children[0]))
     self.assertFalse(job_complete(t.Children[1].Children[1]))
     
     self.assertRaises(JobError, job_complete, t.Children[0].Children[0])
 
     f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll'
     self.assertFalse(os.path.exists(f))
     
     testf = open(f,'w')
     testf.write('0\n')
     testf.close()
     t.PollPath = f
     t.StartTime = 10
     
     self.assertTrue(job_complete(t))
     self.assertNotEqual(t.EndTime, None)
     self.assertNotEqual(t.TotalTime, None)
     
     testf = open(f,'w')
     testf.write('1\n')
     testf.close()
     
     self.assertRaises(JobError, job_complete, t)
     t.Processed = False
     self.assertRaises(JobError, job_complete, t)
     
     os.remove(f)
Esempio n. 2
0
    def test_job_complete(self):
        """check if a job is complete"""
        t = mergeorder(["A", "B", "C", "D", "E"], "foo")
        self.assertFalse(job_complete(t))
        self.assertFalse(job_complete(t.Children[0]))
        self.assertFalse(job_complete(t.Children[1].Children[1]))

        self.assertRaises(JobError, job_complete, t.Children[0].Children[0])

        f = "test_parallel_merge_otus_JOB_COMPLETE_TEST.poll"
        self.assertFalse(os.path.exists(f))

        testf = open(f, "w")
        testf.write("0\n")
        testf.close()
        t.PollPath = f
        t.StartTime = 10

        self.assertTrue(job_complete(t))
        self.assertNotEqual(t.EndTime, None)
        self.assertNotEqual(t.TotalTime, None)

        testf = open(f, "w")
        testf.write("1\n")
        testf.close()

        self.assertRaises(JobError, job_complete, t)
        t.Processed = False
        self.assertRaises(JobError, job_complete, t)

        os.remove(f)
Esempio n. 3
0
    def test_job_complete(self):
        """check if a job is complete"""
        t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo')
        self.assertFalse(job_complete(t))
        self.assertFalse(job_complete(t.Children[0]))
        self.assertFalse(job_complete(t.Children[1].Children[1]))

        self.assertRaises(JobError, job_complete, t.Children[0].Children[0])

        f = 'test_parallel_merge_otus_JOB_COMPLETE_TEST.poll'
        self.assertFalse(os.path.exists(f))

        testf = open(f, 'w')
        testf.write('0\n')
        testf.close()
        t.PollPath = f
        t.StartTime = 10

        self.assertTrue(job_complete(t))
        self.assertNotEqual(t.EndTime, None)
        self.assertNotEqual(t.TotalTime, None)

        testf = open(f, 'w')
        testf.write('1\n')
        testf.close()

        self.assertRaises(JobError, job_complete, t)
        t.Processed = False
        self.assertRaises(JobError, job_complete, t)

        os.remove(f)
Esempio n. 4
0
 def test_initial_nodes_to_merge(self):
     """determine the first nodes to merge"""
     t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo')
     exp = set([t.Children[0], t.Children[1].Children[1]])
     obs = initial_nodes_to_merge(t)
     self.assertEqual(obs, exp)
Esempio n. 5
0
 def test_mergeorder(self):
     """recursively build and join all the subtrees"""
     exp = "((A,B)0,(C,(D,E)1)2)3;"
     obs = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo')
     self.assertEqual(obs.getNewick(escape_name=False), exp)
Esempio n. 6
0
 def test_start_job(self):
     """start a job"""
     exp = 'echo "y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored'
     t = mergeorder(['A.biom', 'B.biom', 'C', 'D', 'E'], 'foo')
     start_job(t.Children[0], 'y', 'ignored', torque_job, False)
     self.assertEqual(t.Children[0].FullCommand, exp)
Esempio n. 7
0
 def test_initial_has_dependencies(self):
     """determine initial has_dependencies"""
     t = mergeorder(['A','B','C','D','E'],'foo')
     exp = [t,t.Children[1]]
     obs = initial_has_dependencies(t, initial_nodes_to_merge(t))
     self.assertEqual(obs, exp)
Esempio n. 8
0
 def test_initial_nodes_to_merge(self):
     """determine the first nodes to merge"""
     t = mergeorder(['A','B','C','D','E'],'foo')
     exp = set([t.Children[0], t.Children[1].Children[1]])
     obs = initial_nodes_to_merge(t)
     self.assertEqual(obs,exp)
Esempio n. 9
0
 def test_mergeorder(self):
     """recursively build and join all the subtrees"""
     exp = "((A,B)0,(C,(D,E)1)2)3;"
     obs = mergeorder(["A", "B", "C", "D", "E"], "foo")
     self.assertEqual(obs.getNewick(escape_name=False), exp)
Esempio n. 10
0
 def test_start_job(self):
     """start a job"""
     exp = 'echo "x y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored'
     t = mergeorder(['A.biom','B.biom','C','D','E'],'foo')
     start_job(t.Children[0], 'x','y','ignored',torque_job,False)
     self.assertEqual(t.Children[0].FullCommand, exp)
Esempio n. 11
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_fps = opts.input_fps
    output_dir = opts.output_dir
    seconds_to_sleep = opts.seconds_to_sleep
    verbose = opts.verbose

    merge_otus_serial_script = 'merge_otu_tables.py'
    created_temp_paths = []

    # set the job_prefix either based on what the user passed in,
    # or a random string beginning with MOTU
    job_prefix = opts.job_prefix or get_random_job_prefix('MOTU')

    # A temporary output directory is created in output_dir named
    # job_prefix. Output files are then moved from the temporary
    # directory to the output directory when they are complete, allowing
    # a poller to detect when runs complete by the presence of their
    # output files.
    working_dir = '%s/%s' % (output_dir, job_prefix)
    try:
        makedirs(working_dir)
    except OSError:
        # working dir already exists
        pass

    import os.path
    # wrapper log output contains run details
    log_fp = os.path.join(output_dir, 'parallel_merge_otus.log')

    wrapper_log_output = open(log_fp, 'w')
    wrapper_log_output.write("Parallel merge output\n\n")

    # construct the dependency tree
    import os

    for f in input_fps:
        if not os.path.exists(f):
            raise IOError("%f does not exist!" % f)

    tree = mergeorder(input_fps, working_dir)

    if verbose:
        print tree.asciiArt()

    wrapper_log_output.write('Dependency tree:\n')
    wrapper_log_output.write(tree.asciiArt())
    wrapper_log_output.write('\n\n')
    wrapper_log_output.flush()

    to_process = initial_nodes_to_merge(tree)
    has_dependencies = initial_has_dependencies(tree, to_process)

    # loop until the whole shabang is done
    pending = []  # jobs that are currently running

    while not tree.Processed:
        # check if we have nodes to process, if so, shoot them off
        for node in to_process:
            if opts.cluster:
                start_job(node,
                          merge_otus_serial_script,
                          qiime_config['torque_queue'],
                          wrap_call=torque_job)
            else:
                start_job(node,
                          merge_otus_serial_script,
                          qiime_config['torque_queue'],
                          wrap_call=local_job)

            wrapper_log_output.write(node.FullCommand)
            wrapper_log_output.write('\n')
            wrapper_log_output.flush()

            pending.append(node)
        to_process = set([])

        # check running jobs
        current_pending = []
        for pending_node in pending:
            # if we're complete, update state
            if job_complete(pending_node):
                wrapper_log_output.write(
                    "Node %s completed in %f seconds" %
                    (pending_node.Name, pending_node.TotalTime))
                wrapper_log_output.write('\n')
                wrapper_log_output.flush()
            else:
                current_pending.append(pending_node)
        pending = current_pending

        # check for new jobs to add
        current_dependencies = []
        for dep_node in has_dependencies:
            # if children are satisfied, then allow for processing
            # the logic here is odd to handle the case where an internal node
            # has both a tip that is a child and child that is an internal node
            children_are_complete = [(c.Processed or c.istip())
                                     for c in dep_node.Children]
            if all(children_are_complete):
                to_process.add(dep_node)
            else:
                current_dependencies.append(dep_node)
        has_dependencies = current_dependencies

        sleep(seconds_to_sleep)
    os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_fps = opts.input_fps
    output_dir = opts.output_dir
    seconds_to_sleep = opts.seconds_to_sleep
    verbose = opts.verbose

    merge_otus_serial_script = 'merge_otu_tables.py'
    created_temp_paths = []

    # set the job_prefix either based on what the user passed in,
    # or a random string beginning with MOTU
    job_prefix = opts.job_prefix or get_random_job_prefix('MOTU')

    # A temporary output directory is created in output_dir named
    # job_prefix. Output files are then moved from the temporary
    # directory to the output directory when they are complete, allowing
    # a poller to detect when runs complete by the presence of their
    # output files.
    working_dir = '%s/%s' % (output_dir, job_prefix)
    try:
        makedirs(working_dir)
    except OSError:
    # working dir already exists
        pass

    import os.path
    # wrapper log output contains run details
    log_fp = os.path.join(output_dir, 'parallel_merge_otus.log')

    wrapper_log_output = open(log_fp, 'w')
    wrapper_log_output.write("Parallel merge output\n\n")

    # construct the dependency tree
    import os

    for f in input_fps:
        if not os.path.exists(f):
            raise IOError("%f does not exist!" % f)

    tree = mergeorder(input_fps, working_dir)

    if verbose:
        print tree.asciiArt()

    wrapper_log_output.write('Dependency tree:\n')
    wrapper_log_output.write(tree.asciiArt())
    wrapper_log_output.write('\n\n')
    wrapper_log_output.flush()

    to_process = initial_nodes_to_merge(tree)
    has_dependencies = initial_has_dependencies(tree, to_process)

    # loop until the whole shabang is done
    pending = []  # jobs that are currently running

    while not tree.Processed:
        # check if we have nodes to process, if so, shoot them off
        for node in to_process:
            if opts.cluster:
                start_job(node, merge_otus_serial_script,
                          qiime_config['torque_queue'], wrap_call=torque_job)
            else:
                start_job(node, merge_otus_serial_script,
                          qiime_config['torque_queue'], wrap_call=local_job)

            wrapper_log_output.write(node.FullCommand)
            wrapper_log_output.write('\n')
            wrapper_log_output.flush()

            pending.append(node)
        to_process = set([])

        # check running jobs
        current_pending = []
        for pending_node in pending:
            # if we're complete, update state
            if job_complete(pending_node):
                wrapper_log_output.write("Node %s completed in %f seconds" %
                                         (pending_node.Name, pending_node.TotalTime))
                wrapper_log_output.write('\n')
                wrapper_log_output.flush()
            else:
                current_pending.append(pending_node)
        pending = current_pending

        # check for new jobs to add
        current_dependencies = []
        for dep_node in has_dependencies:
            # if children are satisfied, then allow for processing
            # the logic here is odd to handle the case where an internal node
            # has both a tip that is a child and child that is an internal node
            children_are_complete = [(c.Processed or c.istip())
                                     for c in dep_node.Children]
            if all(children_are_complete):
                to_process.add(dep_node)
            else:
                current_dependencies.append(dep_node)
        has_dependencies = current_dependencies

        sleep(seconds_to_sleep)
    os.rename(tree.FilePath, "%s/%s" % (output_dir, "merged.biom"))
Esempio n. 13
0
 def test_initial_has_dependencies(self):
     """determine initial has_dependencies"""
     t = mergeorder(["A", "B", "C", "D", "E"], "foo")
     exp = [t, t.Children[1]]
     obs = initial_has_dependencies(t, initial_nodes_to_merge(t))
     self.assertEqual(obs, exp)
Esempio n. 14
0
 def test_initial_nodes_to_merge(self):
     """determine the first nodes to merge"""
     t = mergeorder(["A", "B", "C", "D", "E"], "foo")
     exp = set([t.Children[0], t.Children[1].Children[1]])
     obs = initial_nodes_to_merge(t)
     self.assertEqual(obs, exp)
Esempio n. 15
0
 def test_initial_has_dependencies(self):
     """determine initial has_dependencies"""
     t = mergeorder(['A', 'B', 'C', 'D', 'E'], 'foo')
     exp = [t, t.Children[1]]
     obs = initial_has_dependencies(t, initial_nodes_to_merge(t))
     self.assertEqual(obs, exp)
Esempio n. 16
0
 def test_mergeorder(self):
     """recursively build and join all the subtrees"""
     exp = "((A,B)0,(C,(D,E)1)2)3;"
     obs = mergeorder(['A','B','C','D','E'],'foo')
     self.assertEqual(obs.getNewick(escape_name=False), exp)
Esempio n. 17
0
 def test_start_job(self):
     """start a job"""
     exp = 'echo "y -i A.biom,B.biom -o foo/0.biom; echo $? > foo/0.biom.poll" | qsub -k oe -N MOTU -q ignored'
     t = mergeorder(["A.biom", "B.biom", "C", "D", "E"], "foo")
     start_job(t.Children[0], "y", "ignored", torque_job, False)
     self.assertEqual(t.Children[0].FullCommand, exp)