def getTestSuite(): suite1 = unittest.TestLoader().loadTestsFromTestCase( TestSingleSnpAllPlusSelect) return unittest.TestSuite([suite1]) if __name__ == '__main__': # this import is needed for the runner from fastlmm.association.tests.test_single_snp_all_plus_select import TestSingleSnpAllPlusSelect suites = unittest.TestSuite([getTestSuite()]) if True: #Standard test run r = unittest.TextTestRunner(failfast=True) r.run(suites) else: #Cluster test run from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc logging.basicConfig(level=logging.INFO) from pysnptools.util.mapreduce1.distributabletest import DistributableTest runner = Local() #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5) #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs distributable_test = DistributableTest(suites, "temp_test") print(runner.run(distributable_test)) logging.info("done with testing")
def map_reduce(input_seq, mapper=_identity, reducer=list, input_files=None, output_files=None, name=None, runner=None, nested=None): """ Runs a function on sequence of inputs and runs a second function on the results. Can be nested and clusterized. :param input_seq: a sequence of inputs. The sequence must support the len function and be indexable. e.g. a list, xrange(100) :type input_seq: a sequence :param mapper: A function to apply to each set of inputs (optional). Defaults to the identity function. :type mapper: a function :param reducer: A function to turn the results from the mapper to a single value (optional). Defaults to creating a list of the results. :type reducer: a function that takes a sequence :param input_files: An optional list that tells what input files are needed. The list can contain the names of files (strings), None (ignored), or objects such as :class:`.SnpReader`'s that can self-report their input files. :type input_files: a list :param output_files: An optional list that tells what output files will be produced. The list can contain the names of files (strings), None (ignored), or objects such as :class:`.SnpReader`'s that can self-report their output files. :type output_files: a list :param name: A name to be displayed if this work is done on a cluster. :type name: a string :param runner: a :class:`.Runner`, optional: Tells how to run locally, multi-processor, or on a cluster. If not given, the function is run locally. :type runner: :class:`.Runner` :param nested: a mapper function that is itself a map_reduce. Some runners can efficiently clusterize such nested mappers. :type nested: a function :rtype: The results from the reducer. :Example: Square the numbers 0 to 99 and report their sum, locally: >>> from pysnptools.util.mapreduce1 import map_reduce >>> from six.moves import range #Python 2 & 3 compatibility >>> map_reduce(range(100), ... mapper=lambda x: x*x, ... reducer=sum) 328350 Compute it again, this time run on four processors: >>> from pysnptools.util.mapreduce1.runner import LocalMultiProc >>> from six.moves import range #Python 2 & 3 compatibility >>> map_reduce(range(100), ... mapper=lambda x: x*x, ... reducer=sum, ... runner=LocalMultiProc(4)) 328350 Compute it using named functions, again using four processors: >>> def holder1(n,runner): ... def mapper1(x): ... return x*x ... def reducer1(sequence): ... return sum(sequence) ... return map_reduce(range(n),mapper=mapper1,reducer=reducer1,runner=runner) >>> holder1(100,LocalMultiProc(4)) 328350 """ dist = _MapReduce(input_seq, mapper=mapper, nested=nested, reducer=reducer, input_files=input_files, output_files=output_files, name=name) if runner is None and _is_in_nested(): return dist if runner is None: runner = Local() result = runner.run(dist) return result
# this import is needed for the runner from fastlmm.association.tests.test_single_snp_select import TestSingleSnpSelect suites = unittest.TestSuite([getTestSuite()]) if True: #Standard test run r = unittest.TextTestRunner(failfast=False) r.run(suites) else: #Cluster test run from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc logging.basicConfig(level=logging.INFO) from pysnptools.util.mapreduce1.distributabletest import DistributableTest #runner = HPC(10, 'RR1-N13-09-H44',r'\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\Redmond', # remote_python_parent=r"\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\REDMOND\carlk\Source\carlk\july_7_14\tests\runs\2014-07-24_15_02_02_554725991686\pythonpath", # update_remote_python_parent=True, # priority="AboveNormal",mkl_num_threads=1) runner = Local() #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5) #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs #runner = Hadoop(100, mapmemory=8*1024, reducememory=8*1024, mkl_num_threads=1, queue="default") distributable_test = DistributableTest(suites,"temp_test") print runner.run(distributable_test) logging.info("done with testing")