Beispiel #1
0
def main():
    """Funcion principal, donde interactuamos con las 
       demas clases pasandoles los ficheros necesarios e 
       instanciando las clases del MapReduce"""

    num_cores = 4
    files = getArgs()

    file_manager = FileManager(files)
    lines_files = file_manager.split_in_lines()

    num_lines = len(lines_files)
    partialPart = num_lines / num_cores
    difference = num_lines - (partialPart * num_cores)

    mapper = Mapper("")
    for i in range(partialPart, (num_lines - partialPart) + 1, partialPart):
        t = threading.Thread(mapper.mapping(lines_files[i - partialPart:i]))
        t.start()

    t = threading.Thread(
        mapper.mapping(lines_files[num_lines -
                                   (partialPart + difference):num_lines]))
    t.start()

    shuffleDict = mapper.shuffle(mapper.wordsMap)

    reducer = Reducer()

    result = reducer.reduce(shuffleDict)

    dirToTxt(result)
def main():
	cates_feature=collect_cates()
	all_features=loadFeatures()
	rocchioIndex=loadIndex()
	totalentroy=loadEntroy()
	myutil.makedirectory('cache/subkNNs/')
	while True:
		test_entry=(yield)
		vectorOfme=createVector(test_entry,all_features)
		entroy_of_me={k:v for k,v in totalentroy.items() if k in vectorOfme}
		if not isEmptyVector(vectorOfme):
			yield (test_entry.url,-1)
			continue
		first_candidate=choose_candidate_cate(vectorOfme,rocchioIndex,entroy_of_me)
		if not first_candidate:
			yield (test_entry.url,-1)
			continue
		candidate_cates,cates_tfidf=\
		further_choose_candidate_cate(test_entry,cates_feature,first_candidate,all_features)
		
		threads=[]
		for cate in candidate_cates:
			t=threading.Thread(target=exec_Maper,args=(cate,vectorOfme,entroy_of_me,))
			threads.append(t)
		for t in threads:t.start()
		for t in threads:t.join()
		
		result=Reducer.main(tfidf=cates_tfidf,cateweight=candidate_cates)
		test_entry.thinkbe=result
		
		yield (test_entry.url,test_entry.thinkbe)
    def test_reduce(self):
        # TODO: Remove BOM for csv file to display MSISDN
        data_frame = pd.DataFrame(self.test_dict)
        test_reducer = Reducer.Reducer(data_frame)
        generated_reduced_data_frame = test_reducer.reduce()

        reduced_dict = [{
            'Date': datetime.datetime(2016, 3, 1),
            'Network': 'Network 1',
            'Product': 'Loan Product 1',
            'Aggregate': 1000,
            'Count': 1
        }, {
            'Date': datetime.datetime(2016, 3, 1),
            'Network': 'Network 2',
            'Product': 'Loan Product 1',
            'Aggregate': 1122,
            'Count': 1
        }, {
            'Date': datetime.datetime(2016, 3, 1),
            'Network': 'Network 3',
            'Product': 'Loan Product 2',
            'Aggregate': 2084,
            'Count': 1
        }]

        reduced_data_frame = pd.DataFrame(reduced_dict)
        grouped_by_month = reduced_data_frame.Date.dt.to_period("M")
        group_by_tuple = reduced_data_frame.groupby(
            [grouped_by_month, 'Network', 'Product'])
        expected_reduced_data_frame = group_by_tuple.sum()

        assert generated_reduced_data_frame.equals(expected_reduced_data_frame)
Beispiel #4
0
    def calculate_aggregate_loans(self):
        # Mapper object
        aggregator_map = Mapper.Mapper(self.input_path)
        data = aggregator_map.create_data_frame()

        # Reducer object
        reducer = Reducer.Reducer(data)
        reducer.reduce()
        reducer.print_to_csv(self.output_path)
def main():
    cates_feature = collect_cates()
    all_features = loadFeatures()
    rocchioIndex = loadIndex()
    totalentroy = loadEntroy()
    myutil.makedirectory('cache/subkNNs/')
    while True:
        test_entry = (yield)
        vectorOfme = createVector(test_entry, all_features)
        entroy_of_me = {
            k: v
            for k, v in totalentroy.items() if k in vectorOfme
        }
        if not isEmptyVector(vectorOfme):
            yield (test_entry.url, -1)
            continue
        first_candidate = choose_candidate_cate(vectorOfme, rocchioIndex,
                                                entroy_of_me)
        if not first_candidate:
            yield (test_entry.url, -1)
            continue
        candidate_cates,cates_tfidf=\
        further_choose_candidate_cate(test_entry,cates_feature,first_candidate,all_features)

        threads = []
        for cate in candidate_cates:
            t = threading.Thread(target=exec_Maper,
                                 args=(
                                     cate,
                                     vectorOfme,
                                     entroy_of_me,
                                 ))
            threads.append(t)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        result = Reducer.main(tfidf=cates_tfidf, cateweight=candidate_cates)
        test_entry.thinkbe = result

        yield (test_entry.url, test_entry.thinkbe)
Beispiel #6
0
output = Utils.run(info["failing_js"], info["reduced_test"])
if output == -1:
    exit("Reduced testcase gives an error when run.")
output2 = Utils.run(info["js"], info["reduced_test"])
if output2 == -1:
    exit("Reduced testcase gives an error when run.")
if not present(output, output2):
    exit("Bug isn't present anymore in reduced testcase")

# Reduce
f = open(info["reduced_test"], 'r')
lines = len(f.readlines());
f.close()

script = Script(info)
reducer = Reducer(script)
#reducer.start()

#reducer.line(0, lines)

import random
while 1:
  length = random.randint(0, 100)+1
  line = random.randint(0, len(reducer.script.lines)-10)
  print line, line+length
  lines = reducer.script.lines
  reducer.line(line, line+length)


print lines,
f = open(info["reduced_test"], 'r')
Beispiel #7
0
                print("Each row requires " + str(columns) + " arguments. Try again!")
    
    return matrix

if __name__ == '__main__':
    matrix = takeInput()

    print()
    
    print('Given matrix:')
    for row in matrix:
        print(row)

    print()
    
    rd = ReducerModule.Reducer(matrix)
    print('Echelon form:')
    matrix = rd.EchelonForm()
    for row in matrix:
        print(["{0:0.3f}".format(element) for element in row], end='')
        print('\t', end='')
        print([str(Fraction(element).limit_denominator()) for element in row])

    print()

    print('Reduced Echelon form:')
    matrix = rd.ReducedEchelonForm()
    for row in matrix:
        print(["{0:0.3f}".format(element) for element in row], end='')
        print('\t', end='')
        print([str(Fraction(element).limit_denominator()) for element in row])
Beispiel #8
0
if not Utils.verify(output, info["verification"]):
    exit("Reduced testcase doesn't trigger bug on buggy js parser.")

output = Utils.run(info["js"], info["reduced_test"])
if output == -1:
    exit("Reduced testcase gives an error when run.")
if Utils.verify(output, info["verification"]):
    exit("Reduced testcase triggers bug on normal js parser")

# Reduce
f = open(info["reduced_test"], 'r')
lines = len(f.readlines())
f.close()

script = Script(info)
reducer = Reducer(script)
#reducer.start()

import random
while 1:
    length = random.randint(0, 5) + 1
    line = random.randint(0, len(reducer.script.lines) - 5)
    print line, line + length
    lines = reducer.script.lines
    reducer.line1(line, line + length)
"""

for i in range(100, 0, -1):
  #for j in range(lines/i):
  for j in range(lines/i, 0, -1):
      print j*i, (j+1)*i
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 23 19:04:52 2021

@author: Aravindh
"""

import pandas
import Reducer as red

MyList = []


def Mapper():
    ds = pandas.read_csv("Project.csv")
    for line in ds.index:
        price = (ds['Price'][line]).replace(",", "")
        country = ds['Country'][line]
        product = ds['Product'][line]
        MyList.append(list((country, product, int(price))))


Mapper()
red.Reducer(MyList)
red.printDictionary()
Beispiel #10
0
if not Utils.verify(output, info["verification"]):
    exit("Reduced testcase doesn't trigger bug on buggy js parser.")

output = Utils.run(info["js"], info["reduced_test"])
if output == -1:
    exit("Reduced testcase gives an error when run.")
if Utils.verify(output, info["verification"]):
    exit("Reduced testcase triggers bug on normal js parser")

# Reduce
f = open(info["reduced_test"], 'r')
lines = len(f.readlines());
f.close()

script = Script(info)
reducer = Reducer(script)
#reducer.start()


import random
while 1:
  length = random.randint(0, 5)+1
  line = random.randint(0, len(reducer.script.lines)-5)
  print line, line+length
  lines = reducer.script.lines
  reducer.line1(line, line+length)
"""

for i in range(100, 0, -1):
  #for j in range(lines/i):
  for j in range(lines/i, 0, -1):
Beispiel #11
0
"""
Created on Sat Jan 23 18:40:48 2021

@author: Aravindh
"""
import Reducer as red

MyDictionary = {}
#Item list and all prices
MyList = []
#Item list and final price
itemList = []


def Mapper():
    #Open file for prices
    file = open("pricelist.txt", "r")
    for line in file:
        data = line.strip().split("\t")

        if len(data) != 6:
            continue
        #Store column from text file into variables
        date, time, store, item, cost, payment = data
        #Adding Itesm and price to the lists
        MyList.append(list((item, float(cost))))


Mapper()
red.Reducer(MyList)