Esempio n. 1
0
    def test_local(self):
        from pysnptools.util.mapreduce1 import map_reduce
        from pysnptools.util.mapreduce1.runner import Local

        def holder1(n, runner):
            def mapper1(x):
                return x * x

            def reducer1(sequence):
                return sum(sequence)

            return map_reduce(range(n),
                              mapper=mapper1,
                              reducer=reducer1,
                              runner=runner)

        assert 328350 == holder1(100, Local())
Esempio n. 2
0

if __name__ == '__main__':

    from fastlmm.association.tests.testepistasis import TestEpistasis
    suites = unittest.TestSuite([getTestSuite()])

    if True:  #Standard test run
        r = unittest.TextTestRunner(failfast=False)
        r.run(suites)
    else:  #Cluster test run
        from pysnptools.util.mapreduce1.distributabletest import DistributableTest

        runner = HPC(
            10,
            'RR1-N13-09-H44',
            r'\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\Redmond',
            remote_python_parent=
            r"\\msr-arrays\Scratch\msr-pool\Scratch_Storage4\REDMOND\carlk\Source\carlk\july_7_14\tests\runs\2014-07-24_15_02_02_554725991686\pythonpath",
            update_remote_python_parent=True,
            priority="AboveNormal",
            mkl_num_threads=1)
        runner = Local()
        #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5)
        #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs
        #runner = Hadoop(100, mapmemory=8*1024, reducememory=8*1024, mkl_num_threads=1, queue="default")
        distributable_test = DistributableTest(suites, "temp_test")
        print(runner.run(distributable_test))

    logging.info("done with testing")
Esempio n. 3
0
def map_reduce(input_seq,
               mapper=_identity,
               reducer=list,
               input_files=None,
               output_files=None,
               name=None,
               runner=None,
               nested=None):
    """
    Runs a function on sequence of inputs and runs a second function on the results. Can be nested and clusterized.

    :param input_seq: a sequence of inputs. The sequence must support the len function and be indexable. e.g. a list, xrange(100)
    :type input_seq: a sequence

    :param mapper: A function to apply to each set of inputs (optional). Defaults to the identity function.
    :type mapper: a function

    :param reducer: A function to turn the results from the mapper to a single value (optional). Defaults to creating a list of the results.
    :type reducer: a function that takes a sequence

    :param input_files: An optional list that tells what input files are needed. The list can contain the names of files (strings), None (ignored), or
        objects such as :class:`.SnpReader`'s that can self-report their input files.
    :type input_files: a list

    :param output_files: An optional list that tells what output files will be produced. The list can contain the names of files (strings), None (ignored), or
        objects such as :class:`.SnpReader`'s that can self-report their output files.
    :type output_files: a list

    :param name: A name to be displayed if this work is done on a cluster.
    :type name: a string

    :param runner: a :class:`.Runner`, optional: Tells how to run locally, multi-processor, or on a cluster.
        If not given, the function is run locally.
    :type runner: :class:`.Runner`

    :param nested: a mapper function that is itself a map_reduce. Some runners can efficiently clusterize such nested mappers. 
    :type nested: a function

    :rtype: The results from the reducer.


    :Example:

    Square the numbers 0 to 99 and report their sum, locally:

        >>> from pysnptools.util.mapreduce1 import map_reduce
        >>> from six.moves import range #Python 2 & 3 compatibility
        >>> map_reduce(range(100), 
        ...        mapper=lambda x: x*x,
        ...        reducer=sum)
        328350

    Compute it again, this time run on four processors:

        >>> from pysnptools.util.mapreduce1.runner import LocalMultiProc
        >>> from six.moves import range #Python 2 & 3 compatibility
        >>> map_reduce(range(100),
        ...        mapper=lambda x: x*x,
        ...        reducer=sum,
        ...        runner=LocalMultiProc(4))
        328350

    Compute it using named functions, again using four processors:

        >>> def holder1(n,runner):
        ...     def mapper1(x):
        ...         return x*x
        ...     def reducer1(sequence):
        ...        return sum(sequence)
        ...     return map_reduce(range(n),mapper=mapper1,reducer=reducer1,runner=runner)
        >>> holder1(100,LocalMultiProc(4))
        328350

    """

    dist = _MapReduce(input_seq,
                      mapper=mapper,
                      nested=nested,
                      reducer=reducer,
                      input_files=input_files,
                      output_files=output_files,
                      name=name)
    if runner is None and _is_in_nested():
        return dist

    if runner is None:
        runner = Local()

    result = runner.run(dist)
    return result
Esempio n. 4
0
def mf_to_runner_function(mf):
    excluded_nodes = [
    ]  #'GCRCM07B20','GCRCM11B05','GCRCM10B06','GCRCM02B07']#'GCRCM02B11','GCRCM03B07'] #'GCRCM22B06','GCRCN0383','GCRCM02B07','GCRCN0179','GCRCM37B13','GCRCN0376','GCRCN0456']#'gcrcn0231']#"MSR-HDP-DN0316","MSR-HDP-DN0321","MSR-HDP-DN0336","MSR-HDP-DN0377","MSR-HDP-DN0378","MSR-HDP-DN0314","MSR-HDP-DN0335","MSRQC073","MSRQC002","MSRQC015"]
    remote_python_parent = r"\\GCR\Scratch\RR1\escience\carlk\data\carlk\pythonpath10262016"
    clean_up = False

    if mf == "debug":
        runner_function = lambda ignore: LocalInParts(
            215,
            215,
            mkl_num_threads=20,
            result_file="result.p",
            run_dir=r"C:\deldir\test\outputx")
    elif mf == "local":
        runner_function = lambda ignore: Local()
    elif mf == "local1":
        runner_function = lambda ignore: Local(1)
    elif mf == "lmp":
        runner_function = lambda ignore: LocalMultiProc(22, 5)
    elif mf == "lmt":
        runner_function = lambda ignore: LocalMultiThread(22, 5)
    elif mf == "lmtl":
        runner_function = lambda ignore: LocalMultiThread(
            22, 5, just_one_process=True)
    elif mf == "lmp4":
        runner_function = lambda ignore: LocalMultiProc(4, 5)
    elif mf == "lmpl":
        runner_function = lambda taskcount: LocalMultiProc(
            taskcount, taskcount, just_one_process=True)
    elif mf == "nodeP":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="Preemptable",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            #mkl_num_threads=20,
            nodegroups="Preemptable",
            runtime="0:11:0",  # day:hour:min
            #min = 10 #max(1,min(taskcount,110)//20)
            #max = min(taskcount,500),
            clean_up=clean_up,
        )
    elif mf == "nodeP99":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="Preemptable",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            #mkl_num_threads=20,
            nodegroups="Preemptable,B99",
            runtime="0:11:0",  # day:hour:min
            #min = 10 #max(1,min(taskcount,110)//20)
            #max = min(taskcount,500),
            clean_up=clean_up,
        )
    elif mf == "nodeL99":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="LongRunQ",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            #mkl_num_threads=20,
            nodegroups="LongRunQ,B99",
            runtime="11:0:0",  # day:hour:min
            #min = 10 #max(1,min(taskcount,110)//20)
            #max = min(taskcount,500),
            clean_up=clean_up,
        )
    elif mf == "socketP":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='socket',  #core, socket, node
            update_remote_python_parent=True,
            template="Preemptable",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            mkl_num_threads=10,
            nodegroups="Preemptable",
            runtime="0:11:0",  # day:hour:min
            #min = max(1,min(taskcount,110)//20),
            clean_up=clean_up,
        )
    elif mf == "coreP":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 1000),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='core',  #core, socket, node
            update_remote_python_parent=True,
            template="Preemptable",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            mkl_num_threads=1,
            runtime="0:11:0",  # day:hour:min
            nodegroups="Preemptable",
            #min = min(taskcount,1100)
            min=1,
            max=200 * 20,
            clean_up=clean_up,
        )
    elif mf == "coreP99":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 1000),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='core',  #core, socket, node
            update_remote_python_parent=True,
            template="Preemptable",
            priority="Lowest",
            excluded_nodes=excluded_nodes,
            mkl_num_threads=1,
            runtime="0:11:0",  # day:hour:min
            nodegroups="Preemptable,B99",
            #min = min(taskcount,1100)
            min=1,
            max=200 * 20,
            clean_up=clean_up,
        )
    elif mf == "coreAz":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 1000),
            'GCR',
            r"\\GCR\Scratch\AZ-USCentral\escience",
            remote_python_parent=
            r"\\GCR\Scratch\AZ-USCentral\escience\carlk\data\carlk\pythonpath",
            unit='core',  #core, socket, node
            update_remote_python_parent=True,
            template="Azure IaaS USCentral",
            mkl_num_threads=1,
            runtime="0:8:0",  # day:hour:min,
            clean_up=clean_up,
        )
    elif mf == "nodeE":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 10100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="ExpressQ",
            priority="Normal",
            #node_local = False,
            #mkl_num_threads=20,
            runtime="0:4:0",  # day:hour:min
            #min = min(taskcount,100),
            clean_up=clean_up,
        )
    elif mf == "50tasks":
        runner_function = lambda taskcount: HPC(
            50,
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="ExpressQ",
            priority="Normal",
            #mkl_num_threads=20,
            runtime="0:4:0",  # day:hour:min
            #min = min(taskcount,100),
            clean_up=clean_up,
        )
    elif mf == "coreE":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 10100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='core',  #core, socket, node
            update_remote_python_parent=True,
            template="ExpressQ",
            priority="Normal",
            mkl_num_threads=1,
            runtime="0:4:0",  # day:hour:min
            #min = min(taskcount,100),
            clean_up=clean_up,
        )
    elif mf == "nodeA":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='node',  #core, socket, node
            update_remote_python_parent=True,
            template="Admin Template",
            clean_up=clean_up,
        )
    elif mf == "socketA":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='socket',  #core, socket, node
            update_remote_python_parent=True,
            template="Admin Template",
            clean_up=clean_up,
        )
    elif mf == "coreA":
        runner_function = lambda taskcount: HPC(
            min(taskcount, 30100),
            'GCR',
            r"\\GCR\Scratch\RR1\escience",
            remote_python_parent=remote_python_parent,
            unit='core',  #core, socket, node
            update_remote_python_parent=True,
            template="Admin Template",
            clean_up=clean_up,
        )
    elif mf == "nodeH":
        runner_function = lambda taskcount: Hadoop2(min(taskcount, 100000),
                                                    mapmemory=58 * 1024,
                                                    reducememory=8 * 1024,
                                                    min_alloc=2048,
                                                    xmx=3072,
                                                    mkl_num_threads=14,
                                                    queue="shared",
                                                    skipdatacheck=True,
                                                    skipsourcecheck=True)
    elif mf == "coreH":
        runner_function = lambda taskcount: Hadoop2(min(taskcount, 100000),
                                                    mapmemory=8 * 1024,
                                                    reducememory=8 * 1024,
                                                    min_alloc=2048,
                                                    xmx=3072,
                                                    mkl_num_threads=1,
                                                    queue="shared",
                                                    skipdatacheck=True,
                                                    skipsourcecheck=True)
    else:
        raise Exception("don't find mf=" + mf)
    return runner_function
Esempio n. 5
0
def getTestSuite():

    suite1 = unittest.TestLoader().loadTestsFromTestCase(
        TestSingleSnpAllPlusSelect)
    return unittest.TestSuite([suite1])


if __name__ == '__main__':

    # this import is needed for the runner
    from fastlmm.association.tests.test_single_snp_all_plus_select import TestSingleSnpAllPlusSelect
    suites = unittest.TestSuite([getTestSuite()])

    if True:  #Standard test run
        r = unittest.TextTestRunner(failfast=True)
        r.run(suites)
    else:  #Cluster test run

        from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc
        logging.basicConfig(level=logging.INFO)

        from pysnptools.util.mapreduce1.distributabletest import DistributableTest

        runner = Local()
        #runner = LocalMultiProc(taskcount=20,mkl_num_threads=5)
        #runner = LocalInParts(1,2,mkl_num_threads=1) # For debugging the cluster runs
        distributable_test = DistributableTest(suites, "temp_test")
        print(runner.run(distributable_test))

    logging.info("done with testing")
Esempio n. 6
0
 def _get_runner(self):
     if self.process_count == 1:
         return Local()
     else:
         return LocalMultiThread(self.process_count, just_one_process=False)
Esempio n. 7
0
        blob_service.create_blob_from_bytes(
            'my_container_name',
            'my_blob_name',
            b'<center><h1>Hello World!</h1></center>',
            content_settings=ContentSettings('text/html'))
        print(blob_service.make_blob_url('my_container_name', 'my_blob_name'))


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    #from onemil.azure_copy import TestAzureShardContainer #!!! having this here lets us run on Azure, but stops us from using breakpoints

    suites = unittest.TestLoader().loadTestsFromTestCase(
        TestAzureShardContainer)

    if True:  #Standard test run
        r = unittest.TextTestRunner(
            failfast=True)  #!!!by default should be false
        r.run(suites)
    else:  #runner test run
        logging.basicConfig(level=logging.INFO)

        from pysnptools.util.mapreduce1.distributabletest import DistributableTest
        runner = Local(
        )  #LocalMultiProc(taskcount=22,mkl_num_threads=5,just_one_process=True)
        distributable_test = DistributableTest(suites, "temp_test")
        print runner.run(distributable_test)

    logging.info("done")