def main(): friendships = open(sys.argv[1]) MapReduce.execute(friendships, mapper, reducer)
def main(): # Assumes first argument is a file of json objects formatted as strings, #one per line. MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
import sys from MapReduce import * fmr=MapReduce() def reducer(person,count_list): fmr.emit((person,sum(count_list))) def mapper(record): fmr.emit_intermediate(record[0],1) if __name__=='__main__': fdata=open(sys.argv[1]) fmr.execute(fdata,mapper,reducer)
import sys from MapReduce import * indexmr = MapReduce() def mapper(record): book = record[0] words = record[1].split() for word in words: indexmr.emit_intermediate(word, book) def reducer(word, books): indexmr.emit((word, books)) if __name__ == '__main__': data = open(sys.argv[1]) indexmr.execute(data, mapper, reducer)
from MapReduce import * import sys mr = MapReduce() def mapper(record): mr.emit_intermediate((record[0], record[1]), 1) mr.emit_intermediate((record[1], record[0]), 1) def reducer(key, list_of_values): if len(list_of_values) > 1: return mr.emit(key) inputdata = open(sys.argv[1]) mr.execute(inputdata, mapper, reducer)
import sys from MapReduce import * dnarm = MapReduce() def mapper(record): kind = record[0] trimseq = record[1][0:-10] dnarm.emit_intermediate(trimseq, kind) def reducer(trimseq, kinds): dnarm.emit((trimseq, kinds)) if __name__ == '__main__': DNAseq = open(sys.argv[1]) dnarm.execute(DNAseq, mapper, reducer) print[i[1] for i in dnarm.result]
def main(): persons = open(sys.argv[1]) MapReduce.execute(persons, mapper, reducer)
def main(): MapReduce.execute(open(sys.argv[1]), mapper, reducer)
import sys from MapReduce import * joinmr=MapReduce() def mapper(record): key=record[1] value=record joinmr.emit_intermediate(key,value) def reducer(orderid,ord_list): order=[record[2:] for record in ord_list if record[0]=='order'] item=[record[2:] for record in ord_list if record[0]=='line_item'] for orec in order: for irec in item: joinmr.emit(tuple([orderid]+orec+irec)) if __name__=='__main__': jsdata=open(sys.argv[1]) joinmr.execute(jsdata,mapper,reducer) count=0 for i in joinmr.result: count+=1 print 'record N0.',count,i
import sys from MapReduce import * dnarm=MapReduce() def mapper(record): kind=record[0] trimseq=record[1][0:-10] dnarm.emit_intermediate(trimseq,kind) def reducer(trimseq,kinds): dnarm.emit((trimseq,kinds)) if __name__=='__main__': DNAseq=open(sys.argv[1]) dnarm.execute(DNAseq,mapper,reducer) print [i[1] for i in dnarm.result]
def main(): records_file = open(sys.argv[1]) MapReduce.execute(records_file, mapper, reducer)
import sys from MapReduce import * fmr = MapReduce() def reducer(person, count_list): fmr.emit((person, sum(count_list))) def mapper(record): fmr.emit_intermediate(record[0], 1) if __name__ == '__main__': fdata = open(sys.argv[1]) fmr.execute(fdata, mapper, reducer)
docid.add(record[0].encode('utf-8')) term.add(record[1].encode('utf-8')) docid = list(docid) term = list(term) conn.close() #pre-define the sizes of a&b, which are L*M&M*N M = len(docid) matmr = MapReduce() def mapper(record): for m in range(M + 1): matmr.emit_intermediate((record[0], m), ('a', record[1], record[2])) matmr.emit_intermediate((m, record[0]), ('b', record[1], record[2])) def reducer(key, klist): d = {} d['a'] = {} d['b'] = {} maxm = 0 for t in klist: d[t[0]][t[1]] = t[2] mult = sum([d['a'].get(i, 0) * d['b'].get(i, 0) for i in term]) matmr.emit((key[0], key[1], mult)) if __name__ == '__main__': matmr.execute(matrix, mapper, reducer)
import sys from MapReduce import * indexmr=MapReduce() def mapper(record): book=record[0] words=record[1].split() for word in words: indexmr.emit_intermediate(word,book) def reducer(word,books): indexmr.emit((word,books)) if __name__=='__main__': data=open(sys.argv[1]) indexmr.execute(data,mapper,reducer)
import sys from MapReduce import * joinmr = MapReduce() def mapper(record): key = record[1] value = record joinmr.emit_intermediate(key, value) def reducer(orderid, ord_list): order = [record[2:] for record in ord_list if record[0] == 'order'] item = [record[2:] for record in ord_list if record[0] == 'line_item'] for orec in order: for irec in item: joinmr.emit(tuple([orderid] + orec + irec)) if __name__ == '__main__': jsdata = open(sys.argv[1]) joinmr.execute(jsdata, mapper, reducer) count = 0 for i in joinmr.result: count += 1 print 'record N0.', count, i
def main(): dna = open(sys.argv[1]) MapReduce.execute(dna, mapper, reducer)
def main (): # Assumes first argument is a file of json objects formatted as strings, #one per line. MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
for record in cursor: matrix.append(record) docid.add(record[0].encode('utf-8')) term.add(record[1].encode('utf-8')) docid=list(docid) term=list(term) conn.close() #pre-define the sizes of a&b, which are L*M&M*N M=len(docid) matmr=MapReduce() def mapper(record): for m in range(M+1): matmr.emit_intermediate((record[0],m),('a',record[1],record[2])) matmr.emit_intermediate((m,record[0]),('b',record[1],record[2])) def reducer(key,klist): d={} d['a']={} d['b']={} maxm=0 for t in klist: d[t[0]][t[1]]=t[2] mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in term]) matmr.emit((key[0],key[1],mult)) if __name__=='__main__': matmr.execute(matrix,mapper,reducer)