Ejemplo n.º 1
0
def parse_chunk(file_name, start, end):
    start_time = time.clock()
    
    f = seek_open(file_name, start, end)
    
    mapper = LogMapper()
    for line in f:
        mapper.parse_line(line)
    
    return (mapper, os.getpid(), time.clock() - start_time)
Ejemplo n.º 2
0
def parse_chunk(file_name, temp_dir, start, end):
    start_time = time.time()
    
    f = seek_open(file_name, start, end)
    
    mapper = LogMapper()
    for line in f:
        mapper.parse_line(line)
    
    out_file_name = "%s/%d" % (temp_dir, start)
    pickle.dump(mapper, open(out_file_name, 'wb'))
    return (out_file_name, os.getpid(), time.time() - start_time)
Ejemplo n.º 3
0
def parse_chunk(file_name, temp_dir, start, end, job_id):
    start_time = time.time()
    
    f = seek_open(file_name, start, end)
    
    mapper = LogMapper()
    for line in f:
        mapper.parse_line(line)
    
    file_dict = {}
    for (count_name, counter) in mapper.get_counters().iteritems():
        pickle.dump(counter, open("%s/map_%d_%d" % (temp_dir, job_id, count_names.index(count_name)), 'wb'))
    
    return job_id
Ejemplo n.º 4
0
def parse_file(data_file):
    mapper = LogMapper()
    for line in open(data_file):
        mapper.parse_line(line)
    return mapper.get_counters()