Python map_reduce Beispiele, map_reduce.map_reduce Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: pagerank_mr.py Projekt: ssierral/webcrawlerIR

def pagerank(i,s=0.85,tolerance=0.00001):
  n = len(i)
  iteration = 1
  change = 2
  while change > tolerance:
    print "Iteration: "+str(iteration)
    ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer)
    if ip_list == []: ip = 0
    else: ip = ip_list[0]
    pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n)
    new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer)
    change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)])
    print "Change in l1 norm: "+str(change)
    for j in xrange(n): i[j][0] = new_i[j][1]
    iteration += 1
  return i

Beispiel #2

0

Datei anzeigen

Datei: scattering_fit.py Projekt: kwmsmith/field-trace

def dens_diff_perp(den, idxs, outer_len):
    '''
    integrates perpendicular to the 2D density field.
    '''
    outer_len = float(outer_len)
    nx, ny = den.shape
    wdist = wraparound_dist_1d(nx)
    # assert nx == ny
    dz = outer_len / ny
    res = []
    for idx in idxs:
        x0_pencil = den[idx, :]
        for j in range(nx):
            delta_x = wdist(idx, j)
            x1_pencil = den[j, :]
            ss = np.sum(x0_pencil - x1_pencil) * dz
            res.append((delta_x, ss))

    def mapper(elm):
        return elm[0], elm[1]

    def reducer(gp):
        return sum(gp) / len(gp)

    dens_diff = map_reduce(res, mapper, reducer)
    dd_arr = np.array(sorted(dens_diff.items()))
    dd_arr[:, 0] *= dz
    return dd_arr

Beispiel #3

0

Datei anzeigen

Datei: mag_shear.py Projekt: kwmsmith/field-trace

def integrate_theta(arr, x0, y0, nr):
    nx, ny = arr.shape
    X, Y, R, theta = cartesian_to_polar_coords(x0, y0, nx, ny)
    rmax = R.max()
    rbins = (R / rmax * (nr -1)).astype(np.int32)
    rbins = rbins.flatten()
    # import pylab as pl
    # pl.ion()
    # pl.imshow(rbins, interpolation='nearest', cmap='hot')
    # raw_input('enter to continue')
    R = R.flatten()
    vals = arr.flatten()
    nr = nr or int(rmax+1)
    # ntheta = ntheta or 1
    dta = zip(rbins, R, vals)
    def mapper(elm):
        rbin, r, v = elm
        return (rbin), (r, v)
    def reducer(gp):
        dta = np.array(gp,
                dtype=[('r', np.float), ('val', np.float)])
        rs = dta['r']
        vals = dta['val']
        return (rs.mean(), rs.std(), vals.mean(), vals.std())
    from map_reduce import map_reduce
    mr_res = map_reduce(dta, mapper, reducer)
    dta = np.array(mr_res.values(),
            dtype=[('r', np.float),   ('rstd', np.float),
                   ('val', np.float), ('valstd', np.float)])
    dta.sort(order=['r'])
    return dta

Beispiel #4

0

Datei anzeigen

Datei: pagerank_mr.py Projekt: SagarCC/BigDataProject

def pagerank(i,s=0.85,tolerance=0.00001):
  # Returns the PageRank vector for the web described by i,
  # using parameter s.  The function stops execution 
  # when absolute difference of sum of new page rank vales 
  # and sum of old page rank values is less than tolerance.
  
  n = len(i)
  iteration = 1
  change = 2 # initial estimate of error
  while change > tolerance:
    print "Iteration: "+str(iteration)
    # Run the MapReduce job used to compute the inner product
    # between the vector of dangling pages and the estimated
    # PageRank.
    ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer)

    # the if-else clause is needed in case there are no dangling
    # pages, in which case MapReduce returns ip_list as the empty
    # list.  Otherwise, set ip equal to the first (and only)
    # member of the list returned by MapReduce.
    if ip_list == []: ip = 0
    else: ip = ip_list[0]

    # Dynamically define the reducer used to update the PageRank
    # vector, using the current values for s, ip, and n.
    pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n)

    # Run the MapReduce job used to update the PageRank vector.
    new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer)

    # Compute the new estimate of error.
    change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)])
    #print "Change in l1 norm: "+str(change)

    # Update the estimate PageRank vector.
    for j in xrange(n): i[j][0] = new_i[j][1]

    print "Page rank values of web pages\n";
    for k in xrange(n):
      print "Page rank of page "+str(k)+" = "+str(i[k][0]);
    print "\n";
    
    iteration += 1
  return i

Beispiel #5

0

Datei anzeigen

Datei: sample.py Projekt: fboemer/Rankmaniac

def pagerank(i,s=0.85,tolerance=0.00001):
  # Returns the PageRank vector for the web described by i,
  # using parameter s.  The criterion for convergence is that
  # we stop when M^(j+1)P-M^jP has length less than tolerance,
  # in l1 norm.
  
  n = len(i)
  iteration = 1
  change = 2 # initial estimate of error
  while change > tolerance:
    print "Iteration: "+str(iteration)
    # Run the MapReduce job used to compute the inner product
    # between the vector of dangling pages and the estimated
    # PageRank.
    ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer)

    # the if-else clause is needed in case there are no dangling
    # pages, in which case MapReduce returns ip_list as the empty
    # list.  Otherwise, set ip equal to the first (and only)
    # member of the list returned by MapReduce.
    if ip_list == []: ip = 0
    else: ip = ip_list[0]

    # Dynamically define the reducer used to update the PageRank
    # vector, using the current values for s, ip, and n.
    pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n)

    # Run the MapReduce job used to update the PageRank vector.
    new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer)

    # Compute the new estimate of error.
    change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)])
    print "Change in l1 norm: "+str(change)

    # Update the estimate PageRank vector.
    for j in xrange(n): i[j][0] = new_i[j][1]
    iteration += 1
  return i

Beispiel #6

0

Datei anzeigen

Datei: mag_shear.py Projekt: kwmsmith/field-trace

def theta_sectors(arr, x0, y0, region=None, nr=None, ntheta=None):
    '''
    if region is None: use the entire array.
    '''
    nx, ny = arr.shape
    X, Y, R, theta = cartesian_to_polar_coords(x0, y0, nx, ny)
    if region is None:
        Rs = R.flatten()
        thetas = theta.flatten()
        vals = arr.flatten()
    else:
        idx0, idx1 = zip(*region)
        # idx0, idx1 = np.where(R<=rmax)
        Rs = R[idx0, idx1]
        thetas = theta[idx0, idx1]
        vals = arr[idx0, idx1]
    rmax = Rs.max()
    nr = nr or int(rmax+1)
    ntheta = ntheta or 1
    rbins = (Rs / rmax * (nr - 1)).astype(np.int32)
    thetabins = (thetas / (2*pi) * (ntheta -1)).astype(np.int32)
    dta = zip(rbins, thetabins, Rs, thetas, vals)
    def mapper(elm):
        rbin, thetabin, r, theta, v = elm
        return (rbin, thetabin), elm
    def reducer(gp):
        dta = np.array(gp,
                dtype=[('rbin', np.int32), ('thetabin', np.int32), ('r', np.float), ('theta', np.float), ('val', np.float)])
        rs = dta['r']
        thetas = dta['theta']
        vals = dta['val']
        return (rs.mean(), rs.std(), thetas.mean(), thetas.std(), vals.mean(), vals.std())
    from map_reduce import map_reduce
    sectors = map_reduce(dta, mapper, reducer)
    theta_sectors = {}
    for rbin, tbin in sectors:
        theta_sectors.setdefault(tbin, []).append(sectors[(rbin, tbin)])
    for tbin in theta_sectors:
        gp = theta_sectors[tbin]
        dta = np.array(gp,
                dtype=[('r', np.float), ('rstd', np.float),
                       ('theta', np.float), ('thetastd', np.float),
                       ('val', np.float), ('valstd', np.float)])
        dta.sort(order=['r'])
        theta_sectors[tbin] = dta
    return theta_sectors.values()

Beispiel #7

0

Datei anzeigen

Datei: test.py Projekt: m2o/python-generic

import map_reduce
import string

def mapper(input_key,input_value):
  return [(word,1) for word in
          remove_punctuation(input_value.lower()).split()]

def remove_punctuation(s):
  return s.translate(string.maketrans("",""), string.punctuation)
  
def reducer(intermediate_key,intermediate_value_list):
  return (intermediate_key,sum(intermediate_value_list))
  
filenames = ["text\\a.txt","text\\b.txt","text\\c.txt"]
i = {}
for filename in filenames:
  f = open(filename)
  i[filename] = f.read()
  f.close()
  
print map_reduce.map_reduce(i,mapper,reducer)

Beispiel #8

0

Datei anzeigen

Datei: main.py Projekt: jasonvila/map_reduce_py

        new_content = new_content.replace(contents, "")
        print (contents)
    return new_content
    #return s.replace(string.punctuation, "")

def reducer(intermediate_key, intermediate_value_list):
    print ("reducer - ", intermediate_key)
    print ("reducer - ", intermediate_value_list)
    return (intermediate_key, sum(intermediate_value_list))


"""
# i.items() - copy of dictionary
# intermediate.extend() - extends the list
def map_reduce(i, mapper, reducer):
    intermediate = []
    for (key,value) in i.items():
        intermediate.extend(mapper(key,value))
        print(intermediate)

    # itertools - iterator tools - group the sorted list to lambda x:x[0]
    groups = {}
    for key, group in itertools.groupby(sorted(intermediate), lambda x: x[0]):
        groups[key] = list([y for x, y in group])

    return [reducer(intermediate_key, groups[intermediate_key]) for intermediate_key in groups]
"""


print(map_reduce.map_reduce(i, mapper, reducer))

Beispiel #9

0

Datei anzeigen

Datei: MapReduce.py Projekt: zhangming870/Python-Study

 'to': [1], 'leap': [1], 'white': [1], 'was': [1, 1],
 'mary': [1, 1], 'brown': [1], 'lazy': [1], 'sure': [1],
 'that': [1], 'little': [1], 'small': [1], 'step': [1],
 'everywhere': [1], 'mankind': [1], 'went': [1], 'man': [1],
 'a': [1, 1], 'fleece': [1], 'grey': [1], 'dogs': [1],
 'quick': [1], 'the': [1, 1, 1], 'thats': [1]}
"""

filenames = ["text\\a.txt","text\\b.txt","text\\c.txt"]
i = {}
for filename in filenames:
    f = open(filename)
    i[filename] = f.read()
    f.close()

print map_reduce.map_reduce(i,mapper,reducer)
"""
The map_reduce module imported by this program implements MapReduce in pretty much the simplest possible way, using some useful functions from the itertools library:
"""

# map_reduce.py
"""Defines a single function, map_reduce, which takes an input
dictionary i and applies the user-defined function mapper to each
(input_key,input_value) pair, producing a list of intermediate
keys and intermediate values.  Repeated intermediate keys then
have their values grouped into a list, and the user-defined
function reducer is applied to the intermediate key and list of
intermediate values.  The results are returned as a list."""

import itertools

Beispiel #10

0

Datei anzeigen

    return s.translate(string.maketrans("", ""), string.punctuation)


def reducer(intermediate_key, intermediate_value_list):
    return (intermediate_key, sum(intermediate_value_list))


filenames = ["file3.txt"]
i = {}
for filename in filenames:
    f = open(filename)
    i[filename] = f.read()
    f.close()
f = open("output.txt", "w")
#print(map_reduce.map_reduce(i,mapper,reducer))
l = map_reduce.map_reduce(i, mapper, reducer)
l3 = []
c = []
for (a, b) in l:
    s = a.split('\n')
    i = 0
    l1 = []
    for a in s:
        l1.append(a.strip())
    l1 = l1[2:6]
    # print(l1)
    l2 = []
    for a in l1:
        s = list(a.split('      '))
        l2.append(s[0])
    print(l2)