import sys from MapReduce import * indexmr=MapReduce() def mapper(record): book=record[0] words=record[1].split() for word in words: indexmr.emit_intermediate(word,book) def reducer(word,books): indexmr.emit((word,books)) if __name__=='__main__': data=open(sys.argv[1]) indexmr.execute(data,mapper,reducer)
import sys from MapReduce import * dnarm=MapReduce() def mapper(record): kind=record[0] trimseq=record[1][0:-10] dnarm.emit_intermediate(trimseq,kind) def reducer(trimseq,kinds): dnarm.emit((trimseq,kinds)) if __name__=='__main__': DNAseq=open(sys.argv[1]) dnarm.execute(DNAseq,mapper,reducer) print [i[1] for i in dnarm.result]
def main(): persons = open(sys.argv[1]) MapReduce.execute(persons, mapper, reducer)
import MapReduce import sys mr = MapReduce.MapReduce() mr2 = MapReduce.MapReduce() def mapper(record): mr.emit_intermediate('a', record) def reducer(pos, list_of_values): A = [[0, 0, 0, 0, 0] for i in range(5)] B = [[0, 0, 0, 0, 0] for i in range(5)] C = [[0, 0, 0, 0, 0] for i in range(5)] for record in list_of_values: matrix = record[0] row = record[1] col = record[2] value = record[3] if matrix == 'a': A[row][col] = value else: B[row][col] = value result = 1 for i in range(5): for j in range(5): for k in range(5): C[i][j] += A[i][k] * B[k][j]
import sys from MapReduce import * joinmr=MapReduce() def mapper(record): key=record[1] value=record joinmr.emit_intermediate(key,value) def reducer(orderid,ord_list): order=[record[2:] for record in ord_list if record[0]=='order'] item=[record[2:] for record in ord_list if record[0]=='line_item'] for orec in order: for irec in item: joinmr.emit(tuple([orderid]+orec+irec)) if __name__=='__main__': jsdata=open(sys.argv[1]) joinmr.execute(jsdata,mapper,reducer) count=0 for i in joinmr.result: count+=1 print 'record N0.',count,i
def main(): MapReduce.execute(open(sys.argv[1]), mapper, reducer)
from MapReduce import * import sys mr = MapReduce() def mapper(record): mr.emit_intermediate((record[0], record[1]), 1) mr.emit_intermediate((record[1], record[0]), 1) def reducer(key, list_of_values): if len(list_of_values) > 1: return mr.emit(key) inputdata = open(sys.argv[1]) mr.execute(inputdata, mapper, reducer)
def __init__(self, root): path = "C:/Users/Ashutosh/Bigdata/Uber.json" self.l2 = mr.fun1(path) self.l1 = mr.fun3() self.el = mr.event() self.deve = pd.DataFrame(self.el, columns=['Ename', 'Edate', 'Base', 'Trips']) for i in range(len(self.deve['Edate'])): day, month, year = (int(j) for j in self.deve['Edate'][i].split('/')) year = int('20' + str(year)) born = datetime.date(year, month, day) self.deve['Edate'][i] = str(born) self.ds1 = pd.DataFrame(self.l2, columns=['Dispatch_Base', 'Day', 'Trips']) self.ds2 = pd.DataFrame( self.l1, columns=['Dispatch_Base', 'Day', 'Active_Vehicles']) self.ds3 = pd.merge(self.ds1, self.ds2) self.ds4 = self.ds3.set_index("Dispatch_Base") self.ds5 = self.ds3.set_index("Day") self.root = root self.root.title("Taxi Management System") self.root.geometry("1350x700+0+0") #.............................................................. self.canvas2 = Canvas(self.root, width=600, height=280, bd=0, bg="dimgray", highlightthickness=0, relief='ridge') self.canvas2.place(x=70, y=-20) self.gif2 = ImageTk.PhotoImage(file=r'highway.jpg') self.canvas2.create_image(0, 250, image=self.gif2) self.gif1 = ImageTk.PhotoImage(file=r'taxi2.png') self.ball = self.canvas2.create_image(0, 0, image=self.gif1) graph.animate(self, self.ball) #........................................................................... T = Text(self.root, height=1, width=32, font=("bold", 24), bg="dimgray", bd=0, fg="mistyrose2") T.pack() T.insert(END, 'Welcome to Taxi Management System') self.root.configure(background="dimgray") l = Label(self.root, text='Enter the database path:-', font=('bold', 16), fg="white", bg="dimgray") l.place(x=10, y=310, width=300, height=25) self.varr = StringVar(self.root) self.e1 = Entry(self.root, textvariable=self.varr) self.e1.place(x=290, y=310, width=400, height=25) self.varr.set("C:/Users/Ashutosh/Bigdata/Cab.json") l = Label(self.root, text='Location:-', font=('bold', 16), fg="white", bg="dimgray") l.place(x=130, y=350, width=100, height=25) self.loca = list(set(self.ds1['Dispatch_Base'])) self.loca.append('All') self.locavar = StringVar(root) self.locavar.set(self.loca[0]) w = OptionMenu(self.root, self.locavar, *self.loca) w.place(x=250, y=350, width=110, height=25) l = Label(self.root, text='Week Day:-', font=('bold', 16), fg="white", bg="dimgray") l.place(x=440, y=350, width=115, height=25) self.day = list(set(self.ds1['Day'])) self.day.append('All') self.dayvar = StringVar(self.root) self.dayvar.set(self.day[0]) w = OptionMenu(self.root, self.dayvar, *self.day) w.place(x=580, y=350, width=110, height=25) l = Label(self.root, text='Enter the type of Analysis:-', font=('bold', 16), fg="white", bg="dimgray") l.place(x=10, y=390, width=300, height=25) self.toa = [ 'Trips on all week days of a location', 'Active Vehicles on all week days of a location', 'Trips per Vehicle on all week days of a location', 'Trips per Vehicle on a week day of all locations', 'Trips per Vehicle on all week days of all locations' ] self.toavar = StringVar(self.root) self.toavar.set(self.toa[0]) w = OptionMenu(self.root, self.toavar, *self.toa) w.place(x=320, y=390, width=370, height=25) btn3 = Button(self.root, command=self.analyze, text="Analyze", compound=LEFT, font=("Industry Inc Detail Fill", 20, "bold"), bg="white", fg="dimgray") btn3.place(x=320, y=440, height=50, width=120) l = Label(self.root, text='Upcoming Event:-', font=('bold', 16), fg="white", bg="dimgray") l.place(x=40, y=500, width=180, height=25) self.e_icon = ImageTk.PhotoImage(file=r"events.jpg") e_lb1 = Label(self.root, image=self.e_icon, bg="dimgray") e_lb1.place(x=60, y=540, width=600, height=150) self.eda = self.edate() dst = '-'.join(list(reversed(self.eda.split('-')))) a, self.ba, self.c = self.event(self.eda) l = Label(self.root, text=f"{a} at {self.ba} on {dst}", font=('bold', 16), fg="white", bg="dimgray") l.place(x=160, y=600, width=400, height=25)
def main(): # Assumes first argument is a file of json objects formatted as strings, #one per line. MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
def main(): friendships = open(sys.argv[1]) MapReduce.execute(friendships, mapper, reducer)
import sys from MapReduce import * fmr = MapReduce() def reducer(person, count_list): fmr.emit((person, sum(count_list))) def mapper(record): fmr.emit_intermediate(record[0], 1) if __name__ == '__main__': fdata = open(sys.argv[1]) fmr.execute(fdata, mapper, reducer)
def main(): data = open(sys.argv[1]) MapReduce.execute(data, mapper, reducer)
import sys from MapReduce import * dnarm = MapReduce() def mapper(record): kind = record[0] trimseq = record[1][0:-10] dnarm.emit_intermediate(trimseq, kind) def reducer(trimseq, kinds): dnarm.emit((trimseq, kinds)) if __name__ == '__main__': DNAseq = open(sys.argv[1]) dnarm.execute(DNAseq, mapper, reducer) print[i[1] for i in dnarm.result]
def main(): dna = open(sys.argv[1]) MapReduce.execute(dna, mapper, reducer)
''' Created on May 22, 2013 @author: Josyula ''' from collections import defaultdict import MapReduce import sys #Part 1 mr = MapReduce.MapReduce() mr_final = MapReduce.MapReduce() data = open('matrix_final.json', 'w') p_dict = defaultdict(list) #Part2 def mapper(record): matrix = record[0] row = record[1] col = record[2] value = record[3] if matrix == 'a': mr.emit_intermediate(col, (matrix, row, value)) else: mr.emit_intermediate(row, (matrix, col, value)) #Part 3
import sys from MapReduce import * fmr=MapReduce() def reducer(person,count_list): fmr.emit((person,sum(count_list))) def mapper(record): fmr.emit_intermediate(record[0],1) if __name__=='__main__': fdata=open(sys.argv[1]) fmr.execute(fdata,mapper,reducer)
import MapReduce import sys mr = MapReduce.MapReduce() def mapper(record): personA = record[0] mr.emit_intermediate(personA, 1) def reducer(personA, list_of_friends): mr.emit((personA, len(list_of_friends))) inputdata = open(sys.argv[1]) mr.execute(inputdata, mapper, reducer)
def main (): # Assumes first argument is a file of json objects formatted as strings, #one per line. MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
def main(): records_file = open(sys.argv[1]) MapReduce.execute(records_file, mapper, reducer)
print 'selected successfully\n' matrix=[] docid=set() term=set() for record in cursor: matrix.append(record) docid.add(record[0].encode('utf-8')) term.add(record[1].encode('utf-8')) docid=list(docid) term=list(term) conn.close() #pre-define the sizes of a&b, which are L*M&M*N M=len(docid) matmr=MapReduce() def mapper(record): for m in range(M+1): matmr.emit_intermediate((record[0],m),('a',record[1],record[2])) matmr.emit_intermediate((m,record[0]),('b',record[1],record[2])) def reducer(key,klist): d={} d['a']={} d['b']={} maxm=0 for t in klist: d[t[0]][t[1]]=t[2] mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in term]) matmr.emit((key[0],key[1],mult))
import sys from MapReduce import * #pre-define the sizes of a&b, which are L*M&M*N N=4 L=4 matmr=MapReduce() def mapper(record): if record[0]=='a': for n in range(N+1): matmr.emit_intermediate((record[1],n),(record[0],record[2],record[3])) if record[0]=='b': for l in range(L+1): matmr.emit_intermediate((l,record[2]),(record[0],record[1],record[3])) def reducer(key,klist): d={} d['a']={} d['b']={} maxm=0 for t in klist: maxm=max([maxm,t[1]]) d[t[0]][t[1]]=t[2] mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in range(maxm+1)]) matmr.emit((key[0],key[1],mult)) if __name__=='__main__': matr=open(sys.argv[1]) matmr.execute(matr,mapper,reducer)