예제 #1
0
def getTestSuite():

    suite1 = unittest.TestLoader().loadTestsFromTestCase(
        TestSingleSnpAllPlusSelect)
    return unittest.TestSuite([suite1])


if __name__ == '__main__':

    # this import is needed for the runner
    from fastlmm.association.tests.test_single_snp_all_plus_select import TestSingleSnpAllPlusSelect
    suites = unittest.TestSuite([getTestSuite()])

    if True:  #Standard test run
        r = unittest.TextTestRunner(failfast=True)
        r.run(suites)
    else:  #Cluster test run

        from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc
        logging.basicConfig(level=logging.INFO)

        from pysnptools.util.mapreduce1.distributabletest import DistributableTest

        runner = Local()
        #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5)
        #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs
        distributable_test = DistributableTest(suites, "temp_test")
        print(runner.run(distributable_test))

    logging.info("done with testing")
예제 #2
0
def map_reduce(input_seq,
               mapper=_identity,
               reducer=list,
               input_files=None,
               output_files=None,
               name=None,
               runner=None,
               nested=None):
    """
    Runs a function on sequence of inputs and runs a second function on the results. Can be nested and clusterized.

    :param input_seq: a sequence of inputs. The sequence must support the len function and be indexable. e.g. a list, xrange(100)
    :type input_seq: a sequence

    :param mapper: A function to apply to each set of inputs (optional). Defaults to the identity function.
    :type mapper: a function

    :param reducer: A function to turn the results from the mapper to a single value (optional). Defaults to creating a list of the results.
    :type reducer: a function that takes a sequence

    :param input_files: An optional list that tells what input files are needed. The list can contain the names of files (strings), None (ignored), or
        objects such as :class:`.SnpReader`'s that can self-report their input files.
    :type input_files: a list

    :param output_files: An optional list that tells what output files will be produced. The list can contain the names of files (strings), None (ignored), or
        objects such as :class:`.SnpReader`'s that can self-report their output files.
    :type output_files: a list

    :param name: A name to be displayed if this work is done on a cluster.
    :type name: a string

    :param runner: a :class:`.Runner`, optional: Tells how to run locally, multi-processor, or on a cluster.
        If not given, the function is run locally.
    :type runner: :class:`.Runner`

    :param nested: a mapper function that is itself a map_reduce. Some runners can efficiently clusterize such nested mappers. 
    :type nested: a function

    :rtype: The results from the reducer.


    :Example:

    Square the numbers 0 to 99 and report their sum, locally:

        >>> from pysnptools.util.mapreduce1 import map_reduce
        >>> from six.moves import range #Python 2 & 3 compatibility
        >>> map_reduce(range(100), 
        ...        mapper=lambda x: x*x,
        ...        reducer=sum)
        328350

    Compute it again, this time run on four processors:

        >>> from pysnptools.util.mapreduce1.runner import LocalMultiProc
        >>> from six.moves import range #Python 2 & 3 compatibility
        >>> map_reduce(range(100),
        ...        mapper=lambda x: x*x,
        ...        reducer=sum,
        ...        runner=LocalMultiProc(4))
        328350

    Compute it using named functions, again using four processors:

        >>> def holder1(n,runner):
        ...     def mapper1(x):
        ...         return x*x
        ...     def reducer1(sequence):
        ...        return sum(sequence)
        ...     return map_reduce(range(n),mapper=mapper1,reducer=reducer1,runner=runner)
        >>> holder1(100,LocalMultiProc(4))
        328350

    """

    dist = _MapReduce(input_seq,
                      mapper=mapper,
                      nested=nested,
                      reducer=reducer,
                      input_files=input_files,
                      output_files=output_files,
                      name=name)
    if runner is None and _is_in_nested():
        return dist

    if runner is None:
        runner = Local()

    result = runner.run(dist)
    return result
예제 #3
0
    # this import is needed for the runner
    from fastlmm.association.tests.test_single_snp_select import TestSingleSnpSelect
    suites = unittest.TestSuite([getTestSuite()])

    if True: #Standard test run
        r = unittest.TextTestRunner(failfast=False)
        r.run(suites)
    else: #Cluster test run



        from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc
        logging.basicConfig(level=logging.INFO)

        from pysnptools.util.mapreduce1.distributabletest import DistributableTest


        #runner = HPC(10, 'RR1-N13-09-H44',r'\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\Redmond',
        #                remote_python_parent=r"\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\REDMOND\carlk\Source\carlk\july_7_14\tests\runs\2014-07-24_15_02_02_554725991686\pythonpath",
        #                update_remote_python_parent=True,
        #                priority="AboveNormal",mkl_num_threads=1)
        runner = Local()
        #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5)
        #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs
        #runner = Hadoop(100, mapmemory=8*1024, reducememory=8*1024, mkl_num_threads=1, queue="default")
        distributable_test = DistributableTest(suites,"temp_test")
        print runner.run(distributable_test)


    logging.info("done with testing")