def main(): print 'Get task plan.' scheme = get_scheme_actions() print 'Split task to job.' jobs = plan_to_jobs_convertor(scheme) #map(printer, jobs) print 'Begin Map stage. Wait please...' map_stage_results = map(mapper, jobs) #for at in map_stage_results: # print at[2] # Suffle stage print 'Begin Suffle stage. Wait please...' suffle_stage_results = suffler(map_stage_results) # Reduce for at in suffle_stage_results: #print at one_node = suffle_stage_results[at] #print len(one_node) # Проверка слияния #A = len(set(one_node[0][0].keys()) & set(one_node[1][0])) #print A #, len(one_node[1][0]) result = base_reducer(one_node) pass
def main(): # Запускаем краулер print print 'Run crawler' #result_crawler = docs_spider.get_docs()#fake_crawler_one() print print 'Get task plan.' jobs = docs_spider.get_docs()#get_scheme_actions_srt(result_crawler) #jobs = [jobs[0]] print print 'Jobs' map(printer, jobs) print print 'Begin Map stage. Wait please...' map_stage_results = map(mapper, jobs) # Suffle stage print print 'Begin Suffle stage. Wait please...' suffle_stage_results = suffler(map_stage_results) # Reduce print print 'Begin reduce stage...' result_reduce = {} resultttt = [] for at in suffle_stage_results: one_node = suffle_stage_results[at] # Проверка слияния print at print ' Средняя длина предложения (Оценка Sent_Mean):',one_node[0][1][1]*1.0/one_node[0][1][0], 'слов' result = base_reducer(one_node) resultttt.append(one_node[0][1][1]*1.0/one_node[0][1][0]) node_index = result[0][0] axises = [] for jat in node_index: axises.append((node_index[jat]['N'], jat)) src_list = sorted( axises, key=lambda record: record[0], reverse=True) tmp = [] for it in src_list: tmp.append({it[0]: it[1]}) result_reduce[at] = tmp # Result MapReduce json_result = json.dumps(result_reduce) fname = 'indexes/first_index.json' write_result_file([json_result], fname) #print json_result for at in resultttt: print at, ','