コード例 #1
0
ファイル: inverted_index.py プロジェクト: 1nfo/uw_datasci
import sys
from MapReduce import *

indexmr=MapReduce()
def mapper(record):
    book=record[0]
    words=record[1].split()
    for word in words:
        indexmr.emit_intermediate(word,book)

def reducer(word,books):
    indexmr.emit((word,books))

if __name__=='__main__':
    data=open(sys.argv[1])
    indexmr.execute(data,mapper,reducer)
コード例 #2
0
ファイル: unique_trims.py プロジェクト: 1nfo/uw_datasci
import sys
from MapReduce import *

dnarm=MapReduce()

def mapper(record):
    kind=record[0]
    trimseq=record[1][0:-10]
    dnarm.emit_intermediate(trimseq,kind)

def reducer(trimseq,kinds):
    dnarm.emit((trimseq,kinds))

if __name__=='__main__':
    DNAseq=open(sys.argv[1])
    dnarm.execute(DNAseq,mapper,reducer)
    print [i[1] for i in dnarm.result]
コード例 #3
0
ファイル: sg2.py プロジェクト: bketkar/courses
def main():
    persons = open(sys.argv[1])
    MapReduce.execute(persons, mapper, reducer)
コード例 #4
0
import MapReduce
import sys

mr = MapReduce.MapReduce()
mr2 = MapReduce.MapReduce()


def mapper(record):
    mr.emit_intermediate('a', record)


def reducer(pos, list_of_values):
    A = [[0, 0, 0, 0, 0] for i in range(5)]
    B = [[0, 0, 0, 0, 0] for i in range(5)]
    C = [[0, 0, 0, 0, 0] for i in range(5)]
    for record in list_of_values:
        matrix = record[0]
        row = record[1]
        col = record[2]
        value = record[3]
        if matrix == 'a':
            A[row][col] = value
        else:
            B[row][col] = value

    result = 1
    for i in range(5):
        for j in range(5):
            for k in range(5):
                C[i][j] += A[i][k] * B[k][j]
コード例 #5
0
ファイル: join.py プロジェクト: 1nfo/uw_datasci
import sys
from MapReduce import *

joinmr=MapReduce()

def mapper(record):
    key=record[1]
    value=record
    joinmr.emit_intermediate(key,value)

def reducer(orderid,ord_list):
    order=[record[2:] for record in ord_list if record[0]=='order']
    item=[record[2:] for record in ord_list if record[0]=='line_item']
    for orec in order:
        for irec in item:
            joinmr.emit(tuple([orderid]+orec+irec))

if __name__=='__main__':
    jsdata=open(sys.argv[1])
    joinmr.execute(jsdata,mapper,reducer)
    count=0
    for i in joinmr.result:
        count+=1
        print 'record N0.',count,i
コード例 #6
0
ファイル: InvertedIndex.py プロジェクト: bketkar/courses
def main():
    MapReduce.execute(open(sys.argv[1]), mapper, reducer)
コード例 #7
0
from MapReduce import *
import sys

mr = MapReduce()

def mapper(record):
    mr.emit_intermediate((record[0], record[1]), 1)
    mr.emit_intermediate((record[1], record[0]), 1)

def reducer(key, list_of_values):
	if len(list_of_values) > 1:
		return
	mr.emit(key)

inputdata = open(sys.argv[1])
mr.execute(inputdata, mapper, reducer)
コード例 #8
0
    def __init__(self, root):

        path = "C:/Users/Ashutosh/Bigdata/Uber.json"
        self.l2 = mr.fun1(path)
        self.l1 = mr.fun3()
        self.el = mr.event()
        self.deve = pd.DataFrame(self.el,
                                 columns=['Ename', 'Edate', 'Base', 'Trips'])
        for i in range(len(self.deve['Edate'])):
            day, month, year = (int(j)
                                for j in self.deve['Edate'][i].split('/'))
            year = int('20' + str(year))
            born = datetime.date(year, month, day)
            self.deve['Edate'][i] = str(born)
        self.ds1 = pd.DataFrame(self.l2,
                                columns=['Dispatch_Base', 'Day', 'Trips'])
        self.ds2 = pd.DataFrame(
            self.l1, columns=['Dispatch_Base', 'Day', 'Active_Vehicles'])
        self.ds3 = pd.merge(self.ds1, self.ds2)
        self.ds4 = self.ds3.set_index("Dispatch_Base")
        self.ds5 = self.ds3.set_index("Day")
        self.root = root
        self.root.title("Taxi Management System")
        self.root.geometry("1350x700+0+0")

        #..............................................................
        self.canvas2 = Canvas(self.root,
                              width=600,
                              height=280,
                              bd=0,
                              bg="dimgray",
                              highlightthickness=0,
                              relief='ridge')
        self.canvas2.place(x=70, y=-20)
        self.gif2 = ImageTk.PhotoImage(file=r'highway.jpg')
        self.canvas2.create_image(0, 250, image=self.gif2)
        self.gif1 = ImageTk.PhotoImage(file=r'taxi2.png')
        self.ball = self.canvas2.create_image(0, 0, image=self.gif1)
        graph.animate(self, self.ball)
        #...........................................................................

        T = Text(self.root,
                 height=1,
                 width=32,
                 font=("bold", 24),
                 bg="dimgray",
                 bd=0,
                 fg="mistyrose2")
        T.pack()
        T.insert(END, 'Welcome to Taxi Management System')
        self.root.configure(background="dimgray")
        l = Label(self.root,
                  text='Enter the database path:-',
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=10, y=310, width=300, height=25)
        self.varr = StringVar(self.root)
        self.e1 = Entry(self.root, textvariable=self.varr)
        self.e1.place(x=290, y=310, width=400, height=25)
        self.varr.set("C:/Users/Ashutosh/Bigdata/Cab.json")
        l = Label(self.root,
                  text='Location:-',
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=130, y=350, width=100, height=25)
        self.loca = list(set(self.ds1['Dispatch_Base']))
        self.loca.append('All')
        self.locavar = StringVar(root)
        self.locavar.set(self.loca[0])
        w = OptionMenu(self.root, self.locavar, *self.loca)
        w.place(x=250, y=350, width=110, height=25)
        l = Label(self.root,
                  text='Week Day:-',
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=440, y=350, width=115, height=25)
        self.day = list(set(self.ds1['Day']))
        self.day.append('All')
        self.dayvar = StringVar(self.root)
        self.dayvar.set(self.day[0])
        w = OptionMenu(self.root, self.dayvar, *self.day)
        w.place(x=580, y=350, width=110, height=25)
        l = Label(self.root,
                  text='Enter the type of Analysis:-',
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=10, y=390, width=300, height=25)
        self.toa = [
            'Trips on all week days of a location',
            'Active Vehicles on all week days of a location',
            'Trips per Vehicle on all week days of a location',
            'Trips per Vehicle on a week day of all locations',
            'Trips per Vehicle on all week days of all locations'
        ]
        self.toavar = StringVar(self.root)
        self.toavar.set(self.toa[0])
        w = OptionMenu(self.root, self.toavar, *self.toa)
        w.place(x=320, y=390, width=370, height=25)
        btn3 = Button(self.root,
                      command=self.analyze,
                      text="Analyze",
                      compound=LEFT,
                      font=("Industry Inc Detail Fill", 20, "bold"),
                      bg="white",
                      fg="dimgray")
        btn3.place(x=320, y=440, height=50, width=120)
        l = Label(self.root,
                  text='Upcoming Event:-',
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=40, y=500, width=180, height=25)
        self.e_icon = ImageTk.PhotoImage(file=r"events.jpg")
        e_lb1 = Label(self.root, image=self.e_icon, bg="dimgray")
        e_lb1.place(x=60, y=540, width=600, height=150)
        self.eda = self.edate()
        dst = '-'.join(list(reversed(self.eda.split('-'))))
        a, self.ba, self.c = self.event(self.eda)
        l = Label(self.root,
                  text=f"{a} at {self.ba} on {dst}",
                  font=('bold', 16),
                  fg="white",
                  bg="dimgray")
        l.place(x=160, y=600, width=400, height=25)
コード例 #9
0
def main():
    # Assumes first argument is a file of json objects formatted as strings,
    #one per line.
    MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
コード例 #10
0
ファイル: sg1.py プロジェクト: bketkar/courses
def main():
    friendships = open(sys.argv[1])
    MapReduce.execute(friendships, mapper, reducer)
コード例 #11
0
import sys
from MapReduce import *

fmr = MapReduce()


def reducer(person, count_list):
    fmr.emit((person, sum(count_list)))


def mapper(record):
    fmr.emit_intermediate(record[0], 1)


if __name__ == '__main__':
    fdata = open(sys.argv[1])
    fmr.execute(fdata, mapper, reducer)
コード例 #12
0
ファイル: join.py プロジェクト: wgwz/courses
def main():
    data = open(sys.argv[1])
    MapReduce.execute(data, mapper, reducer)
コード例 #13
0
import sys
from MapReduce import *

dnarm = MapReduce()


def mapper(record):
    kind = record[0]
    trimseq = record[1][0:-10]
    dnarm.emit_intermediate(trimseq, kind)


def reducer(trimseq, kinds):
    dnarm.emit((trimseq, kinds))


if __name__ == '__main__':
    DNAseq = open(sys.argv[1])
    dnarm.execute(DNAseq, mapper, reducer)
    print[i[1] for i in dnarm.result]
コード例 #14
0
def main():
    persons = open(sys.argv[1])
    MapReduce.execute(persons, mapper, reducer)
コード例 #15
0
ファイル: dna.py プロジェクト: bketkar/courses
def main():
    dna = open(sys.argv[1])
    MapReduce.execute(dna, mapper, reducer)
コード例 #16
0
'''
Created on May 22, 2013

@author: Josyula
'''
from collections import defaultdict
import MapReduce
import sys

#Part 1
mr = MapReduce.MapReduce()
mr_final = MapReduce.MapReduce()

data = open('matrix_final.json', 'w')

p_dict = defaultdict(list)
#Part2


def mapper(record):
    matrix = record[0]
    row = record[1]
    col = record[2]
    value = record[3]
    if matrix == 'a':
        mr.emit_intermediate(col, (matrix, row, value))
    else:
        mr.emit_intermediate(row, (matrix, col, value))


#Part 3
コード例 #17
0
ファイル: friend_count.py プロジェクト: 1nfo/uw_datasci
import sys
from MapReduce import *

fmr=MapReduce()

def reducer(person,count_list):
    fmr.emit((person,sum(count_list)))

def mapper(record):
    fmr.emit_intermediate(record[0],1)

if __name__=='__main__':
    fdata=open(sys.argv[1])
    fmr.execute(fdata,mapper,reducer)
コード例 #18
0
import MapReduce
import sys

mr = MapReduce.MapReduce()


def mapper(record):
    personA = record[0]
    mr.emit_intermediate(personA, 1)


def reducer(personA, list_of_friends):
    mr.emit((personA, len(list_of_friends)))


inputdata = open(sys.argv[1])
mr.execute(inputdata, mapper, reducer)
コード例 #19
0
ファイル: matrix_multiply.py プロジェクト: kayc1912/Coursera
def main ():
    # Assumes first argument is a file of json objects formatted as strings,
    #one per line.
    MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
コード例 #20
0
ファイル: mult.py プロジェクト: bketkar/courses
def main():
    records_file = open(sys.argv[1])
    MapReduce.execute(records_file, mapper, reducer)
コード例 #21
0
ファイル: matest.py プロジェクト: 1nfo/uw_datasci
print 'selected successfully\n'

matrix=[]
docid=set()
term=set()
for record in cursor:
    matrix.append(record)
    docid.add(record[0].encode('utf-8'))
    term.add(record[1].encode('utf-8'))
docid=list(docid)
term=list(term)
conn.close()

#pre-define the sizes of a&b, which are L*M&M*N
M=len(docid)
matmr=MapReduce()

def mapper(record):
    for m in range(M+1):
        matmr.emit_intermediate((record[0],m),('a',record[1],record[2]))
        matmr.emit_intermediate((m,record[0]),('b',record[1],record[2]))

def reducer(key,klist):
    d={}
    d['a']={}
    d['b']={}
    maxm=0
    for t in klist:
        d[t[0]][t[1]]=t[2]
    mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in term])
    matmr.emit((key[0],key[1],mult))
コード例 #22
0
import sys
from MapReduce import *

#pre-define the sizes of a&b, which are L*M&M*N
N=4
L=4
matmr=MapReduce()

def mapper(record):
    if record[0]=='a':
        for n in range(N+1):
            matmr.emit_intermediate((record[1],n),(record[0],record[2],record[3]))
    if record[0]=='b':
        for l in range(L+1):
            matmr.emit_intermediate((l,record[2]),(record[0],record[1],record[3]))

def reducer(key,klist):
    d={}
    d['a']={}
    d['b']={}
    maxm=0
    for t in klist:
        maxm=max([maxm,t[1]])
        d[t[0]][t[1]]=t[2]
    mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in range(maxm+1)])
    matmr.emit((key[0],key[1],mult))

if __name__=='__main__':
    matr=open(sys.argv[1])
    matmr.execute(matr,mapper,reducer)