def mapreduce(file, maxx): """ using map reduce, return the most command words list from the whole file return: the most command words list """ # count how many lines in file count = 0 for line in open(file): count += 1 line_split = int(count/4) # Read in the file once and build a list of line offsets line_offset = [0, line_split, 2*line_split, 3*line_split, count] # [0, 1st break point, 2nd break point, 3rd break point] # multi threading to reduce files files_lst = ["mapper0.txt","mapper"+str(line_split)+".txt","mapper"+str(2*line_split)+".txt","mapper"+str(3*line_split)+".txt"] cpus = multiprocessing.cpu_count() # cpus = 4 with ThreadPoolExecutor(max_workers=cpus) as executor: for i in range(len(line_offset)-1): executor.submit(mapper, file, line_offset[i], line_offset[i+1]) time.sleep(1) executor.submit(reducer, [files_lst[i]]) time.sleep(1) reducer(["reducer.txt"]) mstCom_lst = mostCommon("reducer1.txt", maxx) return mstCom_lst
def test_end_turn(): old_state = deepcopy(DEFAULT_STATE) action = {'type': 'end_turn', 'player': 'p2'} old_state.update({'turn': 2}) new_state = reducer(old_state, action) assert new_state['turn'] == 3 old_state = deepcopy(DEFAULT_STATE) action = {'type': 'end_turn', 'player': 'p1'} old_state.update({'turn': 2}) new_state = reducer(old_state, action) assert new_state['turn'] == 2
def test_reducer(self): ''' test data set 1 ''' fn_red_input = os.path.join(self.data_dir, 'red_input1.txt') with open(fn_red_input, 'r') as f_red_input: fn_red_output = None with tempfile.NamedTemporaryFile(mode='w', suffix='.tmp', prefix='red', dir=self.data_dir, delete=False) as f_red_out: fn_red_output = f_red_out.name with StdioSwitcher(f_red_input, f_red_out): target.reducer() # check result f_red_out.close() fn_expected = os.path.join(self.data_dir, 'red_expected1.txt') with open(fn_expected, 'r') as f_exp, open(fn_red_output, 'r') as f_result: for lcnt, l_exp in enumerate(f_exp): l_result = f_result.readline() # check format of the line items_exp = l_exp.split('\t') items_result = l_result.split('\t') self.assertEqual( len(items_exp), len(items_result), 'num items: at line {} in {}'.format(lcnt, fn_red_output)) # check data type self.assertEqual( items_exp[0], items_result[0], 'Item type: at line {} in {}'.format(lcnt, fn_red_output)) # check # of prices prices_exp = [float(v) for v in items_exp[1].split(',')] prices_result = [float(v) for v in items_result[1].split(',')] self.assertEqual( len(prices_exp), len(prices_result), 'num price at line {} in {}'.format(lcnt, fn_red_output)) # compare prices for i, price in enumerate(prices_exp): self.assertAlmostEqual( price, prices_result[i], 7, 'price at line {} in {}'.format(lcnt, fn_red_output)) # delete output file if fn_red_output is not None: os.remove(fn_red_output)
def test_attack_creature(): mudcrab_merchant = { 'power': 5, 'health': 1, 'can_attack': True, 'name': 'Mudcrab Merchant' } creeper = {'power': 1, 'health': 1, 'can_attack': False, 'name': 'Creeper'} old_state = deepcopy(DEFAULT_STATE) action = { 'type': 'attack', 'player': 'p1', 'target': { 'lane_index': 0 }, 'lane': 'field_lane', 'lane_index': 0 } p1 = deepcopy(DEFAULT_PLAYER_STATE) p1.update({'field_lane': [mudcrab_merchant]}) p2 = deepcopy(DEFAULT_PLAYER_STATE) p2.update({'field_lane': [creeper]}) old_state.update({'p1': p1, 'p2': p2}) new_state = reducer(old_state, action) assert new_state != old_state assert new_state['p1']['field_lane'] == [] assert new_state['p2']['field_lane'] == [] assert new_state['p1']['discard_pile'][0]['name'] == 'Mudcrab Merchant' assert new_state['p2']['discard_pile'][0]['name'] == 'Creeper'
def test_reducer(self): ''' test data set 1 ''' fn_red_input = os.path.join(self.data_dir, 'red_input1.txt') with open(fn_red_input, 'r') as f_red_input: fn_red_output = None with tempfile.NamedTemporaryFile(mode='w', suffix='.tmp', prefix='red', dir=self.data_dir, delete=False) as f_red_out: fn_red_output = f_red_out.name with StdioSwitcher(f_red_input, f_red_out): target.reducer() # check result f_red_out.close() fn_expected = os.path.join(self.data_dir, 'red_expected1.txt') with open(fn_expected, 'r') as f_exp, open(fn_red_output, 'r') as f_result: for lcnt, l_exp in enumerate(f_exp): l_result = f_result.readline() # check format of the line items_exp = l_exp.split('\t') items_result = l_result.split('\t') self.assertEqual(len(items_exp), len(items_result), 'num items: at line {} in {}'.format(lcnt, fn_red_output)) # check data type self.assertEqual(items_exp[0], items_result[0], 'Item type: at line {} in {}'.format(lcnt, fn_red_output)) # check # of prices prices_exp = [float(v) for v in items_exp[1].split(',')] prices_result = [float(v) for v in items_result[1].split(',')] self.assertEqual(len(prices_exp), len(prices_result), 'num price at line {} in {}'.format(lcnt, fn_red_output)) # compare prices for i, price in enumerate(prices_exp): self.assertAlmostEqual(price, prices_result[i], 7, 'price at line {} in {}'.format(lcnt, fn_red_output)) # delete output file if fn_red_output is not None: os.remove(fn_red_output)
def __init__(self, n, r, sample, fix, na, smart, saturate, ne, nrr, nerr, nisr, out, dump): self.n = n self.r = r self.sample = sample self.graph = GNR(n, r) self.fix = fix self.na = na self.smart = smart self.saturate = saturate self.ne = ne self.nrr = nrr self.nerr = nerr self.nisr = nisr self.out = out self.dump = dump # Preliminary error checking if self.graph.getGraph().edges() < self.na: raise Exception("Cannot remove more edges than the graph contains.") if n < (nrr + nisr): raise Exception("Cannot remove more vertices than the graph contains.") # Generate the CNF formula # r = reducer() # print >> sys.stderr, 'Creating the original CNF formula' # numVars, cnf = r.reduce(self.graph.getGraph()) # self.write("reduced", numVars, cnf, 0) # Strip down to the induced subgraph # using the structural/random properties, as specified # by the command cmd line arguments print >>sys.stderr, "Stripping the graph" self.strip() print >>sys.stderr, "Filling out edges" # edgesAdded = self.fill() # Let exceptions carry up to -main- # Saturate by default! if saturate: print >>sys.stderr, "Saturating with edges..." self.graph.saturateAvoidK4() r = reducer() print >>sys.stderr, "Reducing to 3-SAT" numVars, cnf = r.reduce(self.graph.getGraph()) self.write("reduced", numVars, cnf, 0) if self.dump: print >>sys.stderr, "Dumping the modified graph" self.dumpGraph() # Assign values and propagate, if fix was set to true print >>sys.stderr, "Assigning the random variables" if self.fix: self.assignAndWrite(numVars, cnf)
def test_reducer(self): input = ['AMBIEN\tSmith James\t100', 'AMBIEN\tGarcia Maria\t200'] expected = ['AMBIEN\t2\t300'] result = [] for output in reducer.reducer(input): result.append(output) print('result: ', result) self.assertEqual(expected, result)
def test_play_creature(): old_state = deepcopy(DEFAULT_STATE) action = { 'type': 'play_creature', 'card_index': 0, 'player': 'p1', 'lane': 'shadow_lane' } p1 = deepcopy(DEFAULT_PLAYER_STATE) p1.update({'hand': [{'name': 'marauder', 'cost': 0}]}) old_state.update({'queued_action': action, 'p1': p1}) new_state = reducer(old_state, action) assert new_state['p1']['hand'] == [] assert new_state['p1']['shadow_lane'] == [old_state['p1']['hand'][0]]
def bowhandler(data, path, namer, qua): new = time.time() perm = np.load(path + namer + data + '.npy') feat = open(path + namer + data + 'bow.json', 'w') newname = 0 for i in perm: val = namerdict[str(i)] qua.seek(val) line = qua.readline() qares = qareg.search(line) qares = mapper(qares.group(1)) qares = reducer(qares, 1) feat.write(str(qares) + '\n') end = time.time() print('%1s%5s took: %6.3fs' % (namer, data, end - new)) feat.close()
def apply_modifications(self, pcmap = None): # Preliminary error checking if (self.graph.getGraph().edges() < self.na): raise Exception("Cannot remove more edges than the graph contains.") if (self.n < (self.nrr + self.nisr)): raise Exception("Cannot remove more vertices than the graph contains.") # Strip down to the induced subgraph # using the structural/random properties, as specified # by the command cmd line arguments print >> sys.stderr, 'Stripping the graph...' print >> sys.stderr, "Random vertices to remove: " + str(self.nrr) print >> sys.stderr, "Random edges to remove: " + str(self.nerr) print >> sys.stderr, "Maximal independent sets to remove: " + str(self.nisr) self.strip() print >> sys.stderr, 'Done.' # Saturate by default! if (self.saturate): print >> sys.stderr, "Saturating with edges..." self.graph.saturateAvoidK4(); print >> sys.stderr, "Done." # Now reduce the graph to SAT r = reducer() print >> sys.stderr, "Reducing to 3-SAT..." numVars, edgeMap, cnf = r.reduce(self.graph.getGraph()) print >> sys.stderr, "Done." # Output or not... print >> sys.stderr, "Writing the original CNF formula..." self.write("reduce", numVars, cnf, 0) print >> sys.stderr, "Done." print >> sys.stderr, "Writing reduced graph edge list..." self.dumpGraph() print >> sys.stderr, "Done." return numVars, edgeMap, cnf
def __init__(self, n, r, na, ne, pl, nrr, nisr, out): self.n = n self.r = r self.graph = GNR(n, r) self.na = na self.ne = ne self.pl = pl self.nrr = nrr self.nisr = nisr self.out = out # Start down to the induced subgraph self.strip() # Generate the CNF formula r = reducer() print >> sys.stderr, 'Creating the CNF formula' numVars, cnf = r.reduce(self.graph.getGraph()) # Assign values and propagate print >> sys.stderr, 'Assigning the random variables' self.assignValues(numVars, cnf)
def test_attack_face(): old_state = deepcopy(DEFAULT_STATE) action = { 'type': 'attack', 'player': 'p1', 'lane': 'field_lane', 'lane_index': 0 } p1 = deepcopy(DEFAULT_PLAYER_STATE) p1.update({ 'field_lane': [{ 'power': 5, 'health': 1, 'can_attack': True, 'name': 'Mudcrab Merchant' }] }) old_state.update({'p1': p1}) new_state = reducer(old_state, action) assert new_state != old_state assert new_state['p1']['field_lane'][0]['can_attack'] == False assert new_state['p2']['health'] == 25
def __init__(self, stdscreen): self.screen = stdscreen curses.curs_set(0) self.path = os.getcwd() self.allowtarget = False self.targetname = None self.obj_criteria = (None, None, None) self.datareducer = reducer.reducer() self.filemenu = getpath.FileMenu(self.screen, "", 1, self.path) self.conf_menu = Menu([], self.screen, "", 2) self.conf_menu.items.insert(0,("Yes", self.conf_menu.end)) self.note = Menu([], self.screen, "", 3) self.objects = [] self.objectmenu = Menu(self.objects, self.screen, "Object Selection Menu", 1) self.cal_items = [ ('Select Bias Directory & Generate Master Bias', self.bias), ('Select Dark Directory & Generate Master Darks', self.dark), ('Select Flat Directory & Generate Master Flats', self.flat), ('Save Calibration Images', self.save) ] calibration = Menu(self.cal_items, self.screen, "Calibration Menu", 1) self.light_items = [ ('Select Data Directory', self.lightdir), ('Select Target', self.selecttarget) ] light = Menu(self.light_items, self.screen, "Data Menu", 1) self.main_menu_items = [ ('Set Up Calibration Images', calibration.display), ('Set Up Data Images', light.display), ('Run Reducer', self.run) ] main_menu = Menu(self.main_menu_items, self.screen, "Data Reducer Main Menu") main_menu.display()
def questhandler(permate): n = 0 start = time.time() for fname in sorted(files): new = time.time() resques = {} # print(fname) qua = open('../Questions/' + fname) namer = namerlist[n] respath = path + namer + '/' if not namer == 'Apli': continue n += 1 if not os.path.exists(path + namer): os.makedirs(path + namer) permate = 1 queslen = 0 i = 0 p = 0 global namerdict if permate: namerdict = {} asindict = {} qdict = {} for l in qua: qares = qareg.search(l) asires = asireg.search(l) if qares: if p % 10000 == 0: end = time.time() # print(p, end-start) qares = mapper(qares.group(1)) qares = reducer(qares, 1) namerdict[str(i)] = qares if asires.group(1) in asindict: asin = asindict[asires.group(1)] asin.append(i) asindict[asires.group(1)] = asin else: asindict[asires.group(1)] = [i] qdict[i] = asires.group(1) i += 1 p += 1 # queslen += len(l) m = len(list(namerdict)) d = round(m / 10) perm = np.random.permutation(m) devl = perm[:d] perm = perm[d:] test = perm[:d] perm = perm[d:] np.save(respath + namer + '_devl', devl) np.save(respath + namer + '_test', test) np.save(respath + namer + '_data', perm) end = time.time() print(namer + ' permutation took:', end - new) with open(respath + namer + '_dict.json', 'w') as fp: json.dump(namerdict, fp) fp.close() with open(respath + namer + '_asin.json', 'w') as fp: json.dump(asindict, fp) fp.close with open(respath + namer + '_qdict.json', 'w') as fp: json.dump(qdict, fp) fp.close else: with open(respath + namer + '_dict.json', 'r') as fp: namerdict = json.load(fp) fp.close() # perm = np.load(respath+namer+'_data.npy') # print(len(list(namerdict))) # nplist = ['_data','_devl','_test'] # for i in nplist: # bowhandler(i,respath,namer,qua) # qua.close() end = time.time() print('Total time took: %6.3fs' % (end - start))
return g def writeCNF(g, fname, numVars, cnf): filename = str(fname) + ".pickle" pickle.dump(g, open(filename, 'w')) # save the pickled version filename = str(fname) + ".out.txt" print >> sys.stderr, filename f = open(filename, 'wb') header, clauses = makeDimacsCNF(numVars, cnf) for c in clauses: for l in c: f.write(str(l) + " ") f.write("0 \n") # Main entry point if __name__ == "__main__": nv = int(sys.argv[1]) npendants = int(sys.argv[2]) saturate = int(sys.argv[3]) iterations = int(sys.argv[4]) r = reducer() # No error checking... use wisely for i in range(iterations): g = buildRandomGraph(nv, npendants, saturate) print >> sys.stderr, "Reducing to 3-SAT" numVars, cnf = r.reduce(g) print(cnf) writeCNF(g, "random_nv" + str(nv) + "_" + str(npendants) + "_" + str(saturate) + "_" + str(i), numVars, cnf)
import os, sys import subprocess from mapper import mapper from reducer import reducer #subprocess = subprocess.Popen("sudo find /", shell=True, stdout=subprocess.PIPE) #subprocess_return = subprocess.stdout.read() #print(subprocess_return) file_name = sys.argv[1] f = open(file_name, "r") combined_occurrences = {} for line in f.readlines(): occurrence = mapper(line) combined_occurrence = reducer(occurrence) combined_occurrences.update(combined_occurrence) max_len = -1 for key, value in combined_occurrences.items(): if value > max_len: max_len = value longest_path = key print("Longest Path:", longest_path) print("Length:", max_len)
def animationFrame(self): if model['renders'] == 0: reducer('ADD_GALOOMBA') for e in pygame.event.get(): if e.type == pygame.QUIT: self.exit = True # Key Down elif e.type == pygame.KEYDOWN: if e.key == pygame.K_ESCAPE: self.exit = True elif e.key == pygame.K_LEFT: reducer('DECREASE_SCROLL_X') reducer('WALK_LEFT') elif e.key == pygame.K_RIGHT: reducer('INCREASE_SCROLL_X') reducer('WALK_RIGHT') elif e.key == pygame.K_SPACE: reducer('JUMP') # Key Up elif e.type == pygame.KEYUP: if e.key == pygame.K_LCTRL or e.key == pygame.K_RCTRL: reducer('SHOOT_FIREBALL') elif e.key == pygame.K_LEFT: reducer('BRAKE_LEFT') elif e.key == pygame.K_RIGHT: reducer('BRAKE_RIGHT') # Mouse Up elif e.type == pygame.MOUSEBUTTONUP: x, y = pygame.mouse.get_pos() reducer('ADD_TUBE', {'x': x, 'y': y})
reader = csv.DictReader(csvfile, fieldnames) for row in reader: d[row['ISO']] = row csvfile2 = open('data/MPI_subnational.csv', 'r') fieldnames2 = ("ISO country code", "Country", "Sub-national region", "World region", "MPI National", "MPI Regional", "Headcount Ratio Regional", "Intensity of deprivation Regional") next(csvfile2) reader2 = csv.DictReader(csvfile2, fieldnames2) for row in reader2: try: d[row['ISO country code']]['subnational'].append(row) except: d[row['ISO country code']]['subnational'] = [row] t = [] for k in d: t.extend(mapper.mapper(k, d[k])) res = defaultdict(list) for x in t: res[x[0]].append(x[1]) for x in res: for y in reducer.reducer(x, res[x]): print(y)
import sys sys.path.append("..") from common import format_key_value from mapper import mapper from shuffler import shuffler from reducer import reducer from map import _map from reduce import _reduce # Map with open("corpus.txt", "r") as stream_in: with open("map.tmp", "w") as stream_map: mapper(stream_in, \ _map, \ lambda key, value: stream_map.write(format_key_value(key, value) + "\n")) # Shuffle with open("map.tmp", "r") as stream_map: with open("suffle.tmp", "w") as stream_shuffle: shuffler(stream_map, \ lambda key, value: stream_shuffle.write(format_key_value(key, value) + "\n")) # Reduce with open("suffle.tmp", "r") as stream_shuffle: with open("reduce.tmp", "w") as stream_reduce: reducer(stream_shuffle, \ _reduce, \ lambda key, value: stream_reduce.write(format_key_value(key, value) + "\n"))
#!/usr/bin/python # -*- coding: utf-8 -*- import sys sys.path.append("..") from reducer import reducer def _reduce(_, values): count = 0 for _ in values: count += 1 return count if __name__ == '__main__': reducer(sys.stdin, _reduce)
import mapper as m import reducer as r #lines = ["this is python class","hello this is shubham learning python","hi python is on","this is a python program for mapper and reducer"] f1 = open("textfile.txt") content = [] for w in f1.readlines(): words = m.mapper(w, ' ') content = content + words f1.close() data = r.reducer(content) fl = open(r'keyValueFile.csv', 'w') for k, v in data.items(): print("%s : %d" % (k, v)) data = str(k) + ":" + str(v) + "\n" fl.write(data) fl.close()
def combiner(stream, combine_function): """ Combiner. """ reducer(stream, combine_function)
res = set(Match) & set(r) extmatch = extreg.search(str(res)) if extmatch: s += catmatch.group(1) + extmatch.group(1) + "," else: s += '"categories": "",' if titmatch: s += titmatch.group() + "," else: s += '"title": "",' if desmatch: s += '"description": ' # s += desmatch.group() feature = desmatch.group(1) feature = mapper(feature) feature = reducer(feature, 1) s += str(feature) else: s += '"description": ""' # f = open(extmatch.group(1)+'.json','a') f.write(s + '\n') # f.close if p % 100000 == 0: end = time.time() print(p, end - start) # break p += 1 descdict[asimatch.group(1)] = linelen linelen += len(line) with open('Desc_dict.json', 'w') as fp: json.dump(descdict, fp)
import os from os import path import extractwordsfile as ewf import reducer as rd import mapper as m from csv import writer import replaceCharacter as rc files_path = path.join( os.getcwd(), 'filescollection') #str(os.getcwd()) + '/filescollection' #print(files_path) files = [ f for f in os.listdir(files_path) if path.isfile(path.join(files_path, f)) ] content = [] for fl in files: pt = path.join(files_path, fl) #rc.replaceChar(pt,"'",'"') content.extend(ewf.extractwordsfromfile(pt)) dic = rd.reducer(content) fl = path.join(os.getcwd(), 'filescollection/wordscount.csv') with open(fl, 'w') as file: csv_write = writer(file) csv_write.writerow(['WORDS', 'COUNT']) for key, value in dic.items(): if len(key) > 0: csv_write.writerow([key, value]) print('FILE CREATED SUCCESSFULLY')