def main(): """Funcion principal, donde interactuamos con las demas clases pasandoles los ficheros necesarios e instanciando las clases del MapReduce""" num_cores = 4 files = getArgs() file_manager = FileManager(files) lines_files = file_manager.split_in_lines() num_lines = len(lines_files) partialPart = num_lines / num_cores difference = num_lines - (partialPart * num_cores) mapper = Mapper("") for i in range(partialPart, (num_lines - partialPart) + 1, partialPart): t = threading.Thread(mapper.mapping(lines_files[i - partialPart:i])) t.start() t = threading.Thread( mapper.mapping(lines_files[num_lines - (partialPart + difference):num_lines])) t.start() shuffleDict = mapper.shuffle(mapper.wordsMap) reducer = Reducer() result = reducer.reduce(shuffleDict) dirToTxt(result)
def main(): cates_feature=collect_cates() all_features=loadFeatures() rocchioIndex=loadIndex() totalentroy=loadEntroy() myutil.makedirectory('cache/subkNNs/') while True: test_entry=(yield) vectorOfme=createVector(test_entry,all_features) entroy_of_me={k:v for k,v in totalentroy.items() if k in vectorOfme} if not isEmptyVector(vectorOfme): yield (test_entry.url,-1) continue first_candidate=choose_candidate_cate(vectorOfme,rocchioIndex,entroy_of_me) if not first_candidate: yield (test_entry.url,-1) continue candidate_cates,cates_tfidf=\ further_choose_candidate_cate(test_entry,cates_feature,first_candidate,all_features) threads=[] for cate in candidate_cates: t=threading.Thread(target=exec_Maper,args=(cate,vectorOfme,entroy_of_me,)) threads.append(t) for t in threads:t.start() for t in threads:t.join() result=Reducer.main(tfidf=cates_tfidf,cateweight=candidate_cates) test_entry.thinkbe=result yield (test_entry.url,test_entry.thinkbe)
def test_reduce(self): # TODO: Remove BOM for csv file to display MSISDN data_frame = pd.DataFrame(self.test_dict) test_reducer = Reducer.Reducer(data_frame) generated_reduced_data_frame = test_reducer.reduce() reduced_dict = [{ 'Date': datetime.datetime(2016, 3, 1), 'Network': 'Network 1', 'Product': 'Loan Product 1', 'Aggregate': 1000, 'Count': 1 }, { 'Date': datetime.datetime(2016, 3, 1), 'Network': 'Network 2', 'Product': 'Loan Product 1', 'Aggregate': 1122, 'Count': 1 }, { 'Date': datetime.datetime(2016, 3, 1), 'Network': 'Network 3', 'Product': 'Loan Product 2', 'Aggregate': 2084, 'Count': 1 }] reduced_data_frame = pd.DataFrame(reduced_dict) grouped_by_month = reduced_data_frame.Date.dt.to_period("M") group_by_tuple = reduced_data_frame.groupby( [grouped_by_month, 'Network', 'Product']) expected_reduced_data_frame = group_by_tuple.sum() assert generated_reduced_data_frame.equals(expected_reduced_data_frame)
def calculate_aggregate_loans(self): # Mapper object aggregator_map = Mapper.Mapper(self.input_path) data = aggregator_map.create_data_frame() # Reducer object reducer = Reducer.Reducer(data) reducer.reduce() reducer.print_to_csv(self.output_path)
def main(): cates_feature = collect_cates() all_features = loadFeatures() rocchioIndex = loadIndex() totalentroy = loadEntroy() myutil.makedirectory('cache/subkNNs/') while True: test_entry = (yield) vectorOfme = createVector(test_entry, all_features) entroy_of_me = { k: v for k, v in totalentroy.items() if k in vectorOfme } if not isEmptyVector(vectorOfme): yield (test_entry.url, -1) continue first_candidate = choose_candidate_cate(vectorOfme, rocchioIndex, entroy_of_me) if not first_candidate: yield (test_entry.url, -1) continue candidate_cates,cates_tfidf=\ further_choose_candidate_cate(test_entry,cates_feature,first_candidate,all_features) threads = [] for cate in candidate_cates: t = threading.Thread(target=exec_Maper, args=( cate, vectorOfme, entroy_of_me, )) threads.append(t) for t in threads: t.start() for t in threads: t.join() result = Reducer.main(tfidf=cates_tfidf, cateweight=candidate_cates) test_entry.thinkbe = result yield (test_entry.url, test_entry.thinkbe)
output = Utils.run(info["failing_js"], info["reduced_test"]) if output == -1: exit("Reduced testcase gives an error when run.") output2 = Utils.run(info["js"], info["reduced_test"]) if output2 == -1: exit("Reduced testcase gives an error when run.") if not present(output, output2): exit("Bug isn't present anymore in reduced testcase") # Reduce f = open(info["reduced_test"], 'r') lines = len(f.readlines()); f.close() script = Script(info) reducer = Reducer(script) #reducer.start() #reducer.line(0, lines) import random while 1: length = random.randint(0, 100)+1 line = random.randint(0, len(reducer.script.lines)-10) print line, line+length lines = reducer.script.lines reducer.line(line, line+length) print lines, f = open(info["reduced_test"], 'r')
print("Each row requires " + str(columns) + " arguments. Try again!") return matrix if __name__ == '__main__': matrix = takeInput() print() print('Given matrix:') for row in matrix: print(row) print() rd = ReducerModule.Reducer(matrix) print('Echelon form:') matrix = rd.EchelonForm() for row in matrix: print(["{0:0.3f}".format(element) for element in row], end='') print('\t', end='') print([str(Fraction(element).limit_denominator()) for element in row]) print() print('Reduced Echelon form:') matrix = rd.ReducedEchelonForm() for row in matrix: print(["{0:0.3f}".format(element) for element in row], end='') print('\t', end='') print([str(Fraction(element).limit_denominator()) for element in row])
if not Utils.verify(output, info["verification"]): exit("Reduced testcase doesn't trigger bug on buggy js parser.") output = Utils.run(info["js"], info["reduced_test"]) if output == -1: exit("Reduced testcase gives an error when run.") if Utils.verify(output, info["verification"]): exit("Reduced testcase triggers bug on normal js parser") # Reduce f = open(info["reduced_test"], 'r') lines = len(f.readlines()) f.close() script = Script(info) reducer = Reducer(script) #reducer.start() import random while 1: length = random.randint(0, 5) + 1 line = random.randint(0, len(reducer.script.lines) - 5) print line, line + length lines = reducer.script.lines reducer.line1(line, line + length) """ for i in range(100, 0, -1): #for j in range(lines/i): for j in range(lines/i, 0, -1): print j*i, (j+1)*i
# -*- coding: utf-8 -*- """ Created on Sat Jan 23 19:04:52 2021 @author: Aravindh """ import pandas import Reducer as red MyList = [] def Mapper(): ds = pandas.read_csv("Project.csv") for line in ds.index: price = (ds['Price'][line]).replace(",", "") country = ds['Country'][line] product = ds['Product'][line] MyList.append(list((country, product, int(price)))) Mapper() red.Reducer(MyList) red.printDictionary()
if not Utils.verify(output, info["verification"]): exit("Reduced testcase doesn't trigger bug on buggy js parser.") output = Utils.run(info["js"], info["reduced_test"]) if output == -1: exit("Reduced testcase gives an error when run.") if Utils.verify(output, info["verification"]): exit("Reduced testcase triggers bug on normal js parser") # Reduce f = open(info["reduced_test"], 'r') lines = len(f.readlines()); f.close() script = Script(info) reducer = Reducer(script) #reducer.start() import random while 1: length = random.randint(0, 5)+1 line = random.randint(0, len(reducer.script.lines)-5) print line, line+length lines = reducer.script.lines reducer.line1(line, line+length) """ for i in range(100, 0, -1): #for j in range(lines/i): for j in range(lines/i, 0, -1):
""" Created on Sat Jan 23 18:40:48 2021 @author: Aravindh """ import Reducer as red MyDictionary = {} #Item list and all prices MyList = [] #Item list and final price itemList = [] def Mapper(): #Open file for prices file = open("pricelist.txt", "r") for line in file: data = line.strip().split("\t") if len(data) != 6: continue #Store column from text file into variables date, time, store, item, cost, payment = data #Adding Itesm and price to the lists MyList.append(list((item, float(cost)))) Mapper() red.Reducer(MyList)