def test_local(self): from pysnptools.util.mapreduce1 import map_reduce from pysnptools.util.mapreduce1.runner import Local def holder1(n, runner): def mapper1(x): return x * x def reducer1(sequence): return sum(sequence) return map_reduce(range(n), mapper=mapper1, reducer=reducer1, runner=runner) assert 328350 == holder1(100, Local())
if __name__ == '__main__': from fastlmm.association.tests.testepistasis import TestEpistasis suites = unittest.TestSuite([getTestSuite()]) if True: #Standard test run r = unittest.TextTestRunner(failfast=False) r.run(suites) else: #Cluster test run from pysnptools.util.mapreduce1.distributabletest import DistributableTest runner = HPC( 10, 'RR1-N13-09-H44', r'\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\Redmond', remote_python_parent= r"\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\REDMOND\carlk\Source\carlk\july_7_14\tests\runs\2014-07-24_15_02_02_554725991686\pythonpath", update_remote_python_parent=True, priority="AboveNormal", mkl_num_threads=1) runner = Local() #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5) #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs #runner = Hadoop(100, mapmemory=8*1024, reducememory=8*1024, mkl_num_threads=1, queue="default") distributable_test = DistributableTest(suites, "temp_test") print(runner.run(distributable_test)) logging.info("done with testing")
def map_reduce(input_seq, mapper=_identity, reducer=list, input_files=None, output_files=None, name=None, runner=None, nested=None): """ Runs a function on sequence of inputs and runs a second function on the results. Can be nested and clusterized. :param input_seq: a sequence of inputs. The sequence must support the len function and be indexable. e.g. a list, xrange(100) :type input_seq: a sequence :param mapper: A function to apply to each set of inputs (optional). Defaults to the identity function. :type mapper: a function :param reducer: A function to turn the results from the mapper to a single value (optional). Defaults to creating a list of the results. :type reducer: a function that takes a sequence :param input_files: An optional list that tells what input files are needed. The list can contain the names of files (strings), None (ignored), or objects such as :class:`.SnpReader`'s that can self-report their input files. :type input_files: a list :param output_files: An optional list that tells what output files will be produced. The list can contain the names of files (strings), None (ignored), or objects such as :class:`.SnpReader`'s that can self-report their output files. :type output_files: a list :param name: A name to be displayed if this work is done on a cluster. :type name: a string :param runner: a :class:`.Runner`, optional: Tells how to run locally, multi-processor, or on a cluster. If not given, the function is run locally. :type runner: :class:`.Runner` :param nested: a mapper function that is itself a map_reduce. Some runners can efficiently clusterize such nested mappers. :type nested: a function :rtype: The results from the reducer. :Example: Square the numbers 0 to 99 and report their sum, locally: >>> from pysnptools.util.mapreduce1 import map_reduce >>> from six.moves import range #Python 2 & 3 compatibility >>> map_reduce(range(100), ... mapper=lambda x: x*x, ... reducer=sum) 328350 Compute it again, this time run on four processors: >>> from pysnptools.util.mapreduce1.runner import LocalMultiProc >>> from six.moves import range #Python 2 & 3 compatibility >>> map_reduce(range(100), ... mapper=lambda x: x*x, ... reducer=sum, ... runner=LocalMultiProc(4)) 328350 Compute it using named functions, again using four processors: >>> def holder1(n,runner): ... def mapper1(x): ... return x*x ... def reducer1(sequence): ... return sum(sequence) ... return map_reduce(range(n),mapper=mapper1,reducer=reducer1,runner=runner) >>> holder1(100,LocalMultiProc(4)) 328350 """ dist = _MapReduce(input_seq, mapper=mapper, nested=nested, reducer=reducer, input_files=input_files, output_files=output_files, name=name) if runner is None and _is_in_nested(): return dist if runner is None: runner = Local() result = runner.run(dist) return result
def mf_to_runner_function(mf): excluded_nodes = [ ] #'GCRCM07B20','GCRCM11B05','GCRCM10B06','GCRCM02B07']#'GCRCM02B11','GCRCM03B07'] #'GCRCM22B06','GCRCN0383','GCRCM02B07','GCRCN0179','GCRCM37B13','GCRCN0376','GCRCN0456']#'gcrcn0231']#"MSR-HDP-DN0316","MSR-HDP-DN0321","MSR-HDP-DN0336","MSR-HDP-DN0377","MSR-HDP-DN0378","MSR-HDP-DN0314","MSR-HDP-DN0335","MSRQC073","MSRQC002","MSRQC015"] remote_python_parent = r"\\GCR\Scratch\RR1\escience\carlk\data\carlk\pythonpath10262016" clean_up = False if mf == "debug": runner_function = lambda ignore: LocalInParts( 215, 215, mkl_num_threads=20, result_file="result.p", run_dir=r"C:\deldir\test\outputx") elif mf == "local": runner_function = lambda ignore: Local() elif mf == "local1": runner_function = lambda ignore: Local(1) elif mf == "lmp": runner_function = lambda ignore: LocalMultiProc(22, 5) elif mf == "lmt": runner_function = lambda ignore: LocalMultiThread(22, 5) elif mf == "lmtl": runner_function = lambda ignore: LocalMultiThread( 22, 5, just_one_process=True) elif mf == "lmp4": runner_function = lambda ignore: LocalMultiProc(4, 5) elif mf == "lmpl": runner_function = lambda taskcount: LocalMultiProc( taskcount, taskcount, just_one_process=True) elif mf == "nodeP": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="Preemptable", priority="Lowest", excluded_nodes=excluded_nodes, #mkl_num_threads=20, nodegroups="Preemptable", runtime="0:11:0", # day:hour:min #min = 10 #max(1,min(taskcount,110)//20) #max = min(taskcount,500), clean_up=clean_up, ) elif mf == "nodeP99": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="Preemptable", priority="Lowest", excluded_nodes=excluded_nodes, #mkl_num_threads=20, nodegroups="Preemptable,B99", runtime="0:11:0", # day:hour:min #min = 10 #max(1,min(taskcount,110)//20) #max = min(taskcount,500), clean_up=clean_up, ) elif mf == "nodeL99": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="LongRunQ", priority="Lowest", excluded_nodes=excluded_nodes, #mkl_num_threads=20, nodegroups="LongRunQ,B99", runtime="11:0:0", # day:hour:min #min = 10 #max(1,min(taskcount,110)//20) #max = min(taskcount,500), clean_up=clean_up, ) elif mf == "socketP": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='socket', #core, socket, node update_remote_python_parent=True, template="Preemptable", priority="Lowest", excluded_nodes=excluded_nodes, mkl_num_threads=10, nodegroups="Preemptable", runtime="0:11:0", # day:hour:min #min = max(1,min(taskcount,110)//20), clean_up=clean_up, ) elif mf == "coreP": runner_function = lambda taskcount: HPC( min(taskcount, 1000), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='core', #core, socket, node update_remote_python_parent=True, template="Preemptable", priority="Lowest", excluded_nodes=excluded_nodes, mkl_num_threads=1, runtime="0:11:0", # day:hour:min nodegroups="Preemptable", #min = min(taskcount,1100) min=1, max=200 * 20, clean_up=clean_up, ) elif mf == "coreP99": runner_function = lambda taskcount: HPC( min(taskcount, 1000), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='core', #core, socket, node update_remote_python_parent=True, template="Preemptable", priority="Lowest", excluded_nodes=excluded_nodes, mkl_num_threads=1, runtime="0:11:0", # day:hour:min nodegroups="Preemptable,B99", #min = min(taskcount,1100) min=1, max=200 * 20, clean_up=clean_up, ) elif mf == "coreAz": runner_function = lambda taskcount: HPC( min(taskcount, 1000), 'GCR', r"\\GCR\Scratch\AZ-USCentral\escience", remote_python_parent= r"\\GCR\Scratch\AZ-USCentral\escience\carlk\data\carlk\pythonpath", unit='core', #core, socket, node update_remote_python_parent=True, template="Azure IaaS USCentral", mkl_num_threads=1, runtime="0:8:0", # day:hour:min, clean_up=clean_up, ) elif mf == "nodeE": runner_function = lambda taskcount: HPC( min(taskcount, 10100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="ExpressQ", priority="Normal", #node_local = False, #mkl_num_threads=20, runtime="0:4:0", # day:hour:min #min = min(taskcount,100), clean_up=clean_up, ) elif mf == "50tasks": runner_function = lambda taskcount: HPC( 50, 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="ExpressQ", priority="Normal", #mkl_num_threads=20, runtime="0:4:0", # day:hour:min #min = min(taskcount,100), clean_up=clean_up, ) elif mf == "coreE": runner_function = lambda taskcount: HPC( min(taskcount, 10100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='core', #core, socket, node update_remote_python_parent=True, template="ExpressQ", priority="Normal", mkl_num_threads=1, runtime="0:4:0", # day:hour:min #min = min(taskcount,100), clean_up=clean_up, ) elif mf == "nodeA": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='node', #core, socket, node update_remote_python_parent=True, template="Admin Template", clean_up=clean_up, ) elif mf == "socketA": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='socket', #core, socket, node update_remote_python_parent=True, template="Admin Template", clean_up=clean_up, ) elif mf == "coreA": runner_function = lambda taskcount: HPC( min(taskcount, 30100), 'GCR', r"\\GCR\Scratch\RR1\escience", remote_python_parent=remote_python_parent, unit='core', #core, socket, node update_remote_python_parent=True, template="Admin Template", clean_up=clean_up, ) elif mf == "nodeH": runner_function = lambda taskcount: Hadoop2(min(taskcount, 100000), mapmemory=58 * 1024, reducememory=8 * 1024, min_alloc=2048, xmx=3072, mkl_num_threads=14, queue="shared", skipdatacheck=True, skipsourcecheck=True) elif mf == "coreH": runner_function = lambda taskcount: Hadoop2(min(taskcount, 100000), mapmemory=8 * 1024, reducememory=8 * 1024, min_alloc=2048, xmx=3072, mkl_num_threads=1, queue="shared", skipdatacheck=True, skipsourcecheck=True) else: raise Exception("don't find mf=" + mf) return runner_function
def getTestSuite(): suite1 = unittest.TestLoader().loadTestsFromTestCase( TestSingleSnpAllPlusSelect) return unittest.TestSuite([suite1]) if __name__ == '__main__': # this import is needed for the runner from fastlmm.association.tests.test_single_snp_all_plus_select import TestSingleSnpAllPlusSelect suites = unittest.TestSuite([getTestSuite()]) if True: #Standard test run r = unittest.TextTestRunner(failfast=True) r.run(suites) else: #Cluster test run from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc logging.basicConfig(level=logging.INFO) from pysnptools.util.mapreduce1.distributabletest import DistributableTest runner = Local() #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5) #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs distributable_test = DistributableTest(suites, "temp_test") print(runner.run(distributable_test)) logging.info("done with testing")
def _get_runner(self): if self.process_count == 1: return Local() else: return LocalMultiThread(self.process_count, just_one_process=False)
blob_service.create_blob_from_bytes( 'my_container_name', 'my_blob_name', b'<center><h1>Hello World!</h1></center>', content_settings=ContentSettings('text/html')) print(blob_service.make_blob_url('my_container_name', 'my_blob_name')) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) #from onemil.azure_copy import TestAzureShardContainer #!!! having this here lets us run on Azure, but stops us from using breakpoints suites = unittest.TestLoader().loadTestsFromTestCase( TestAzureShardContainer) if True: #Standard test run r = unittest.TextTestRunner( failfast=True) #!!!by default should be false r.run(suites) else: #runner test run logging.basicConfig(level=logging.INFO) from pysnptools.util.mapreduce1.distributabletest import DistributableTest runner = Local( ) #LocalMultiProc(taskcount=22,mkl_num_threads=5,just_one_process=True) distributable_test = DistributableTest(suites, "temp_test") print runner.run(distributable_test) logging.info("done")