コード例 #1
0
def prepare_user_sessions(source_file, data_dir):
    
    pipes = Pipes(data_dir, suffix=".user_session.csv")

    with lzma.open(source_file, 'rt') as sf:
        plines = 0
        t = time.time()
        for line in sf:
            plines += 1
            if plines % MONITOR_LINES == 0:
                print ("processed lines: %d mem: %rMB, lines/s: %r" %
                 (plines,
                  float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1024,
                  int(MONITOR_LINES / (time.time() - t))
                 )
                )
                t = time.time()

            # if int(plines) >= int(1000000):
            #     print("BREAK")
            #     break

            elems = line.split('|')

            if len(elems) <= EXECUTION_TIME:
                continue

            if not elems[START_TIME].isdigit():
                print ("bad line: %s" % line)
                continue

            ts = int(elems[START_TIME])
            user_id = elems[USER_ID]
            host_id = elems[HOST_ID]

            execution_time = 0
            if elems[EXECUTION_TIME].isdigit():
                execution_time = int(elems[EXECUTION_TIME])

            op = Operation()
            if elems[REQUEST] == 'GET':
                if not elems[FILE_SIZE].isdigit():
                    continue
                
                op.ts = ts
                op.optype = 'g'
                op.obj_id = obj_id = get_md5(elems[PARAMS].strip(), hexdigest=True)
                op.parent_dir_id = get_md5(os.path.dirname(elems[PARAMS].strip()), hexdigest=True)
                op.size = int(elems[FILE_SIZE])
                op.execution_time = execution_time
                pipes.write_to(user_id + "_" + host_id, str(op))

            elif elems[REQUEST] == 'PUT':
                if not elems[FILE_SIZE].isdigit():
                    continue
                
                op.ts = ts
                op.optype = 'p'
                op.obj_id = obj_id = get_md5(elems[PARAMS].strip(), hexdigest=True)
                op.parent_dir_id = get_md5(os.path.dirname(elems[PARAMS].strip()), hexdigest=True)
                op.size = int(elems[FILE_SIZE])
                op.execution_time = execution_time
                pipes.write_to(user_id + "_" + host_id, str(op))

            elif elems[REQUEST] == 'DEL':
                op.ts = ts
                op.optype = 'd'
                op.obj_id = obj_id = get_md5(elems[PARAMS].strip(), hexdigest=True)
                op.parent_dir_id = get_md5(os.path.dirname(elems[PARAMS].strip()), hexdigest=True)
                op.execution_time = execution_time
                pipes.write_to(user_id + "_" + host_id, str(op))
            elif elems[REQUEST] == 'RENAME':
                op.ts = ts
                op.optype = 'r'
                op.obj_id = obj_id = get_md5(elems[PARAMS].strip(), hexdigest=True)
                op.parent_dir_id = get_md5(os.path.dirname(elems[PARAMS].strip()), hexdigest=True)
                op.execution_time = execution_time
                pipes.write_to(user_id + "_" + host_id, str(op))

    pipes.close()
コード例 #2
0
    ]

    #remove the old log file, as outpipe is append only.
    if os.path.exists(os.path.join(results_dir, target_file_name)):
        os.remove(os.path.join(results_dir, target_file_name))

    out_pipe = Pipes(results_dir)

    csv_header = ";".join([
        "user_id", "from_ts", "till_ts", "session_lifetime", "get_requests",
        "reget_requests", "put_requests", "get_bytes", "put_bytes",
        "rename_requests", "del_requests", "get_dirs", "put_dirs",
        "put_files_per_dir", "get_files_per_dir", "window_seconds"
    ])

    out_pipe.write_to(target_file_name, csv_header)

    cnt = 0
    for sf in users_session_files:
        cnt += 1
        print("working on %d/%d" % (cnt, len(users_session_files)))
        analyze_user_session(sf, out_pipe, target_file_name)

        # if cnt >=20:
        #     break

    out_pipe.close()

    print("wrote results to %s: " %
          (os.path.join(results_dir, target_file_name)))