def pagerank(i,s=0.85,tolerance=0.00001): n = len(i) iteration = 1 change = 2 while change > tolerance: print "Iteration: "+str(iteration) ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer) if ip_list == []: ip = 0 else: ip = ip_list[0] pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n) new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer) change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)]) print "Change in l1 norm: "+str(change) for j in xrange(n): i[j][0] = new_i[j][1] iteration += 1 return i
def dens_diff_perp(den, idxs, outer_len): ''' integrates perpendicular to the 2D density field. ''' outer_len = float(outer_len) nx, ny = den.shape wdist = wraparound_dist_1d(nx) # assert nx == ny dz = outer_len / ny res = [] for idx in idxs: x0_pencil = den[idx, :] for j in range(nx): delta_x = wdist(idx, j) x1_pencil = den[j, :] ss = np.sum(x0_pencil - x1_pencil) * dz res.append((delta_x, ss)) def mapper(elm): return elm[0], elm[1] def reducer(gp): return sum(gp) / len(gp) dens_diff = map_reduce(res, mapper, reducer) dd_arr = np.array(sorted(dens_diff.items())) dd_arr[:, 0] *= dz return dd_arr
def integrate_theta(arr, x0, y0, nr): nx, ny = arr.shape X, Y, R, theta = cartesian_to_polar_coords(x0, y0, nx, ny) rmax = R.max() rbins = (R / rmax * (nr -1)).astype(np.int32) rbins = rbins.flatten() # import pylab as pl # pl.ion() # pl.imshow(rbins, interpolation='nearest', cmap='hot') # raw_input('enter to continue') R = R.flatten() vals = arr.flatten() nr = nr or int(rmax+1) # ntheta = ntheta or 1 dta = zip(rbins, R, vals) def mapper(elm): rbin, r, v = elm return (rbin), (r, v) def reducer(gp): dta = np.array(gp, dtype=[('r', np.float), ('val', np.float)]) rs = dta['r'] vals = dta['val'] return (rs.mean(), rs.std(), vals.mean(), vals.std()) from map_reduce import map_reduce mr_res = map_reduce(dta, mapper, reducer) dta = np.array(mr_res.values(), dtype=[('r', np.float), ('rstd', np.float), ('val', np.float), ('valstd', np.float)]) dta.sort(order=['r']) return dta
def pagerank(i,s=0.85,tolerance=0.00001): # Returns the PageRank vector for the web described by i, # using parameter s. The function stops execution # when absolute difference of sum of new page rank vales # and sum of old page rank values is less than tolerance. n = len(i) iteration = 1 change = 2 # initial estimate of error while change > tolerance: print "Iteration: "+str(iteration) # Run the MapReduce job used to compute the inner product # between the vector of dangling pages and the estimated # PageRank. ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer) # the if-else clause is needed in case there are no dangling # pages, in which case MapReduce returns ip_list as the empty # list. Otherwise, set ip equal to the first (and only) # member of the list returned by MapReduce. if ip_list == []: ip = 0 else: ip = ip_list[0] # Dynamically define the reducer used to update the PageRank # vector, using the current values for s, ip, and n. pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n) # Run the MapReduce job used to update the PageRank vector. new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer) # Compute the new estimate of error. change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)]) #print "Change in l1 norm: "+str(change) # Update the estimate PageRank vector. for j in xrange(n): i[j][0] = new_i[j][1] print "Page rank values of web pages\n"; for k in xrange(n): print "Page rank of page "+str(k)+" = "+str(i[k][0]); print "\n"; iteration += 1 return i
def pagerank(i,s=0.85,tolerance=0.00001): # Returns the PageRank vector for the web described by i, # using parameter s. The criterion for convergence is that # we stop when M^(j+1)P-M^jP has length less than tolerance, # in l1 norm. n = len(i) iteration = 1 change = 2 # initial estimate of error while change > tolerance: print "Iteration: "+str(iteration) # Run the MapReduce job used to compute the inner product # between the vector of dangling pages and the estimated # PageRank. ip_list = map_reduce.map_reduce(i,ip_mapper,ip_reducer) # the if-else clause is needed in case there are no dangling # pages, in which case MapReduce returns ip_list as the empty # list. Otherwise, set ip equal to the first (and only) # member of the list returned by MapReduce. if ip_list == []: ip = 0 else: ip = ip_list[0] # Dynamically define the reducer used to update the PageRank # vector, using the current values for s, ip, and n. pr_reducer = lambda x,y: pr_reducer_inter(x,y,s,ip,n) # Run the MapReduce job used to update the PageRank vector. new_i = map_reduce.map_reduce(i,pr_mapper,pr_reducer) # Compute the new estimate of error. change = sum([abs(new_i[j][1]-i[j][0]) for j in xrange(n)]) print "Change in l1 norm: "+str(change) # Update the estimate PageRank vector. for j in xrange(n): i[j][0] = new_i[j][1] iteration += 1 return i
def theta_sectors(arr, x0, y0, region=None, nr=None, ntheta=None): ''' if region is None: use the entire array. ''' nx, ny = arr.shape X, Y, R, theta = cartesian_to_polar_coords(x0, y0, nx, ny) if region is None: Rs = R.flatten() thetas = theta.flatten() vals = arr.flatten() else: idx0, idx1 = zip(*region) # idx0, idx1 = np.where(R<=rmax) Rs = R[idx0, idx1] thetas = theta[idx0, idx1] vals = arr[idx0, idx1] rmax = Rs.max() nr = nr or int(rmax+1) ntheta = ntheta or 1 rbins = (Rs / rmax * (nr - 1)).astype(np.int32) thetabins = (thetas / (2*pi) * (ntheta -1)).astype(np.int32) dta = zip(rbins, thetabins, Rs, thetas, vals) def mapper(elm): rbin, thetabin, r, theta, v = elm return (rbin, thetabin), elm def reducer(gp): dta = np.array(gp, dtype=[('rbin', np.int32), ('thetabin', np.int32), ('r', np.float), ('theta', np.float), ('val', np.float)]) rs = dta['r'] thetas = dta['theta'] vals = dta['val'] return (rs.mean(), rs.std(), thetas.mean(), thetas.std(), vals.mean(), vals.std()) from map_reduce import map_reduce sectors = map_reduce(dta, mapper, reducer) theta_sectors = {} for rbin, tbin in sectors: theta_sectors.setdefault(tbin, []).append(sectors[(rbin, tbin)]) for tbin in theta_sectors: gp = theta_sectors[tbin] dta = np.array(gp, dtype=[('r', np.float), ('rstd', np.float), ('theta', np.float), ('thetastd', np.float), ('val', np.float), ('valstd', np.float)]) dta.sort(order=['r']) theta_sectors[tbin] = dta return theta_sectors.values()
import map_reduce import string def mapper(input_key,input_value): return [(word,1) for word in remove_punctuation(input_value.lower()).split()] def remove_punctuation(s): return s.translate(string.maketrans("",""), string.punctuation) def reducer(intermediate_key,intermediate_value_list): return (intermediate_key,sum(intermediate_value_list)) filenames = ["text\\a.txt","text\\b.txt","text\\c.txt"] i = {} for filename in filenames: f = open(filename) i[filename] = f.read() f.close() print map_reduce.map_reduce(i,mapper,reducer)
new_content = new_content.replace(contents, "") print (contents) return new_content #return s.replace(string.punctuation, "") def reducer(intermediate_key, intermediate_value_list): print ("reducer - ", intermediate_key) print ("reducer - ", intermediate_value_list) return (intermediate_key, sum(intermediate_value_list)) """ # i.items() - copy of dictionary # intermediate.extend() - extends the list def map_reduce(i, mapper, reducer): intermediate = [] for (key,value) in i.items(): intermediate.extend(mapper(key,value)) print(intermediate) # itertools - iterator tools - group the sorted list to lambda x:x[0] groups = {} for key, group in itertools.groupby(sorted(intermediate), lambda x: x[0]): groups[key] = list([y for x, y in group]) return [reducer(intermediate_key, groups[intermediate_key]) for intermediate_key in groups] """ print(map_reduce.map_reduce(i, mapper, reducer))
'to': [1], 'leap': [1], 'white': [1], 'was': [1, 1], 'mary': [1, 1], 'brown': [1], 'lazy': [1], 'sure': [1], 'that': [1], 'little': [1], 'small': [1], 'step': [1], 'everywhere': [1], 'mankind': [1], 'went': [1], 'man': [1], 'a': [1, 1], 'fleece': [1], 'grey': [1], 'dogs': [1], 'quick': [1], 'the': [1, 1, 1], 'thats': [1]} """ filenames = ["text\\a.txt","text\\b.txt","text\\c.txt"] i = {} for filename in filenames: f = open(filename) i[filename] = f.read() f.close() print map_reduce.map_reduce(i,mapper,reducer) """ The map_reduce module imported by this program implements MapReduce in pretty much the simplest possible way, using some useful functions from the itertools library: """ # map_reduce.py """Defines a single function, map_reduce, which takes an input dictionary i and applies the user-defined function mapper to each (input_key,input_value) pair, producing a list of intermediate keys and intermediate values. Repeated intermediate keys then have their values grouped into a list, and the user-defined function reducer is applied to the intermediate key and list of intermediate values. The results are returned as a list.""" import itertools
return s.translate(string.maketrans("", ""), string.punctuation) def reducer(intermediate_key, intermediate_value_list): return (intermediate_key, sum(intermediate_value_list)) filenames = ["file3.txt"] i = {} for filename in filenames: f = open(filename) i[filename] = f.read() f.close() f = open("output.txt", "w") #print(map_reduce.map_reduce(i,mapper,reducer)) l = map_reduce.map_reduce(i, mapper, reducer) l3 = [] c = [] for (a, b) in l: s = a.split('\n') i = 0 l1 = [] for a in s: l1.append(a.strip()) l1 = l1[2:6] # print(l1) l2 = [] for a in l1: s = list(a.split(' ')) l2.append(s[0]) print(l2)