Python mapReduce 예제들, scoop.futures.mapReduce Python 예제들

예제 #1

0

파일 보기

파일: tests.py 프로젝트: M-Abdulaziz81/neuronunit

def funcDoubleMapReduce(l):
    resultat = futures.mapReduce(func4,
                                 operator.add,
                                 l)
    resultat2 = futures.mapReduce(func4,
                                 operator.add,
                                 l)
    _control.execQueue.socket.pumpInfoSocket()
    return resultat == resultat2

예제 #2

0

파일 보기

파일: tests.py 프로젝트: SmokinCaterpillar/scoop

def funcDoubleMapReduce(l):
    resultat = futures.mapReduce(func4,
                                 operator.add,
                                 l)
    resultat2 = futures.mapReduce(func4,
                                 operator.add,
                                 l)
    _control.execQueue.socket.pumpInfoSocket()
    return resultat == resultat2

예제 #3

0

파일 보기

def CompareMapReduce():
    mapScoopTime = time.time()
    res = futures.mapReduce(
        simulateWorkload,
        operator.add,
        list([a] * a for a in range(1000)),
    )
    mapScoopTime = time.time() - mapScoopTime
    print("futures.map in SCOOP executed in {0:.3f}s \
           with result:{1}".format(
        mapScoopTime,
        res
        )
    )

    mapPythonTime = time.time()
    res = sum(
        map(
            simulateWorkload,
            list([a] * a for a in range(1000))
        )
    )
    mapPythonTime = time.time() - mapPythonTime
    print("map Python executed in: {0:.3f}s \
           with result: {1}".format(
        mapPythonTime,
        res
        )
    )

예제 #4

0

파일 보기

파일: map_reduce.py 프로젝트: lancelote/parallel_python

def CompareMapReduce():
    mapScoopTime = time.time()
    res = futures.mapReduce(
        simulateWorkload,
        operator.add,
        list([a] * a for a in range(1000)),
    )
    mapScoopTime = time.time() - mapScoopTime
    print("futures.map in SCOOP executed in {0:.3f}s \
           with result:{1}".format(
        mapScoopTime,
        res
        )
    )

    mapPythonTime = time.time()
    res = sum(
        map(
            simulateWorkload,
            list([a] * a for a in range(1000))
        )
    )
    mapPythonTime = time.time() - mapPythonTime
    print("map Python executed in: {0:.3f}s \
           with result: {1}".format(
        mapPythonTime,
        res
        )
    )

예제 #5

0

파일 보기

def test_sum():
    bound = 100000
    data = [random.randint(-1000, 1000) for r in range(bound)]
    # Python's standard serial function
    start = time.time()
    serialSum = sum(map(abs, data))
    print(start - time.time())

    # SCOOP's parallel function
    start = time.time()
    parallelSum = futures.mapReduce(abs, operator.add, data)
    print(start - time.time())
    assert serialSum == parallelSum

예제 #6

0

파일 보기

파일: sum_multiples.py 프로젝트: SemanticBeeng/scoop

#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with SCOOP. If not, see <http://www.gnu.org/licenses/>.
#
"""
Sums the mutliples of 3 and 5 below 1000000
"""
from time import time
from scoop import futures
from operator import add

def multiples(n):
    return set(range(0, 1000000, n))


if __name__ == '__main__':
    bt = time()
    serial_result = sum(set.union(*map(multiples, [3, 5])))
    serial_time = time() - bt

    bt = time()
    parallel_result = sum(futures.mapReduce(multiples, set.union, [3, 5]))
    parallel_reduce_time = time() - bt

    assert serial_result == parallel_result

    print("Serial time: {0:.4f} s\nParallel time: {1:.4f} s"
          "".format(serial_time, parallel_reduce_time)
    )

예제 #7

0

파일 보기

       values are equal to the sum of values for each key"""

    # explicitly copy the dictionary, as otherwise
    # we risk modifying 'dict1'
    combined = {}

    for key in dict1.keys():
        combined[key] = dict1[key]

    for key in dict2.keys():
        if key in combined:
            combined[key] += dict2[key]
        else:
            combined[key] = dict2[key]

    return combined


if __name__ == "__main__":
    # need to call it using python countwords.py ../shakespeare/*
    files = sys.argv[1:]

    total_dict = futures.mapReduce(count_words, reduce_dicts, files)
    words = sorted([k for k, v in total_dict.iteritems() if v > 2000])

    def print_line(word, count):
        print word, ' = ', count

    map(print_line, words, [total_dict[x] for x in words])

    # use Counter here?

예제 #8

0

파일 보기

파일: Distributed.py 프로젝트: MauroCE/StatisticalComputingPortfolio2

from scoop import futures
from functools import reduce
import numpy as np


def prod_modulo(a, b, c, d):
    """Computes (a + b) / (c + d)"""
    return (a % 7) * (b % 7) * (c % 7) * (d % 7)


def subtract(x, y):
    """Computes x - y"""
    return x - y


if __name__ == "__main__":

    a = np.random.randint(-10, 10, size=(100, ))
    b = np.random.randint(-10, 10, size=(100, ))
    c = np.random.randint(-10, 10, size=(100, ))
    d = np.random.randint(-10, 10, size=(100, ))

    out = futures.mapReduce(prod_modulo, subtract, a, b, c, d)

    print("Result: ", out)

예제 #9

0

파일 보기

파일: rssDoc.py 프로젝트: SemanticBeeng/scoop

def RSS(index):
    # Get the data interval to compute on a given Future
    data = zip(leftSignal[index:index+PARALLEL_SIZE],
               rightSignal[index:index+PARALLEL_SIZE])
    return sum(abs(y - x)**2 for y, x in data)


if __name__ == "__main__":
    # Parallel with reduction call
    # Take a beginning timestamp
    ts = time.time()
    # Generate indexes to pass to futures
    indexes = range(0,
                    len(leftSignal),
                    PARALLEL_SIZE,
                    )
    # Execute the RSS computation parallely
    presult = futures.mapReduce(RSS,
                                operator.add,
                                indexes,
                                )
    ptime = time.time() - ts
    print("mapReduce result obtained in {0:03f}s".format(ptime))

    # Serial
    ts = time.time()
    sresult = sum(abs(a - b)**2 for a, b in zip(leftSignal, rightSignal))
    stime = time.time() - ts
    print("Serial result obtained in {0:03f}s".format(stime))
    assert presult == sresult

예제 #10

0

파일 보기

파일: map_reduce.py 프로젝트: SemanticBeeng/scoop

import time

from scoop import futures


def manipulateData(inData, chose=None):
    # Simulate a 10ms workload on every tasks
    time.sleep(0.01)
    return sum(inData)


if __name__ == '__main__':
    scoopTime = time.time()
    res = futures.mapReduce(
        manipulateData,
        operator.add,
        list([a] * a for a in range(1000)),
    )
    scoopTime = time.time() - scoopTime
    print("Executed parallely in: {0:.3f}s with result: {1}".format(
        scoopTime,
        res
        )
    )

    serialTime = time.time()
    res = sum(
        map(
            manipulateData,
            list([a] * a for a in range(1000))
        )

예제 #11

0

파일 보기

#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with SCOOP. If not, see <http://www.gnu.org/licenses/>.
#
"""
Sums the mutliples of 3 and 5 below 1000000
"""
from time import time
from scoop import futures
from operator import add


def multiples(n):
    return set(range(0, 1000000, n))


if __name__ == '__main__':
    bt = time()
    serial_result = sum(set.union(*map(multiples, [3, 5])))
    serial_time = time() - bt

    bt = time()
    parallel_result = sum(futures.mapReduce(multiples, set.union, [3, 5]))
    parallel_reduce_time = time() - bt

    assert serial_result == parallel_result

    print("Serial time: {0:.4f} s\nParallel time: {1:.4f} s"
          "".format(serial_time, parallel_reduce_time))

예제 #12

0

파일 보기

파일: rssDoc.py 프로젝트: wvangeit/scoop

    data = zip(leftSignal[index:index + PARALLEL_SIZE],
               rightSignal[index:index + PARALLEL_SIZE])
    return sum(abs(y - x)**2 for y, x in data)


if __name__ == "__main__":
    # Parallel with reduction call
    # Take a beginning timestamp
    ts = time.time()
    # Generate indexes to pass to futures
    indexes = range(
        0,
        len(leftSignal),
        PARALLEL_SIZE,
    )
    # Execute the RSS computation parallely
    presult = futures.mapReduce(
        RSS,
        operator.add,
        indexes,
    )
    ptime = time.time() - ts
    print("mapReduce result obtained in {0:03f}s".format(ptime))

    # Serial
    ts = time.time()
    sresult = sum(abs(a - b)**2 for a, b in zip(leftSignal, rightSignal))
    stime = time.time() - ts
    print("Serial result obtained in {0:03f}s".format(stime))
    assert presult == sresult

예제 #13

0

파일 보기

from scoop import futures


def product(x, y):
    """Return the product of the arguments"""
    return x + y


def sum(x, y):
    """Return the sum of the arguments"""
    return x + y


if __name__ == "__main__":

    a = range(1, 101)
    b = range(101, 201)

    total = futures.mapReduce(product, sum, a, b)

    print("Sum of the products equals %d" % total)

예제 #14

0

파일 보기

파일: bin_width_variation.py 프로젝트: thomasjdelaney/Eight_Probe

        spike_time_dict = ep.loadSpikeTimeDict(mouse_name, cell_ids, cell_info, mat_dir)
        pairs = np.array(list(combinations(cell_ids, 2)))
        chunked_pairs = np.array_split(pairs, args.num_chunks)
        bin_widths = ep.selected_bin_widths if args.bin_width_type == 'selected' else ep.bin_widths
        for bin_width in bin_widths:
            print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing bin width ' + str(bin_width) + '...')
            spike_count_dict = ep.getSpikeCountDict(spike_time_dict, bin_width, spon_start_time)
            if args.save_firing_rate_frame:
                firing_rate_frame = ep.getFiringRateFrameFromSpikeCountDict(spike_count_dict, bin_width)
                save_file = os.path.join(npy_dir, 'firing_rate_frames', mouse_name + '_' + str(bin_width).replace('.', 'p') + '_' + 'firing.npy')
                firing_rate_frame.to_pickle(save_file)
                print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.')
            if args.save_analysis_frame:
                save_file = os.path.join(csv_dir, 'analysis_frames', mouse_name + '_' + str(bin_width).replace('.', 'p') + '_' + 'analysis.csv')
                removed = os.remove(save_file) if os.path.exists(save_file) else None
                for i,pair_chunk in enumerate(chunked_pairs):
                    print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing chunk number ' + str(i) + '...')
                    analysis_frame = pd.DataFrame.from_dict(futures.mapReduce(getAnalysisDictForPair, reduceAnalysisDicts, constructMapFuncArgs(pair_chunk, spike_count_dict)))
                    analysis_frame['bin_width'] = bin_width
                    saveAnalysisFrame(analysis_frame, i, save_file)
                print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.')
            if args.save_conditional_correlations:
                print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing conditional correlations...')
                cond_analysis_frame, linear_model_frame, exp_cond_cov, cov_of_cond_expectations = getConditionalAnalysisFrame(mouse_face, spike_count_dict, ep.getBinsForSpikeCounts(spike_time_dict, bin_width, spon_start_time))
                saveCondFramesMatrices(cond_analysis_frame, linear_model_frame, exp_cond_cov, cov_of_cond_expectations, mouse_name, bin_width)
                print(dt.datetime.now().isoformat() + ' INFO: Conditional analysis frames and matrices saved.')
            if args.save_spike_count_frame:
                save_file, spike_count_frame = saveSpikeCountFrame(cell_ids, bin_width, spike_time_dict, spon_start_time, mouse_name)
                print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.')
    print(dt.datetime.now().isoformat() + ' INFO: ' + 'Done.')