sys.path.append('..') import lib.logbasic as logbasic from lib.myGraph import * from lib.myWeb import WebPage, WebObject from lib.utilities import Logger parser = argparse.ArgumentParser(description='Page reconstruction from weblog using type-based approach.') parser.add_argument('logfile', type=str, help= 'log file containing the request/response pair') args = parser.parse_args() input_file = args.logfile detected_pageurl = input_file+'.page.tmp' ###### logging this_log = './log/'+sys.argv[0].replace('.', '_')+'.log' log_h = Logger(this_log) print 'log file: %s' % this_log print 'Reading log...' all_lines = logbasic.read(input_file) print 'Processing rrp...' all_nodes = [] for line in all_lines: all_nodes.append(logbasic.NodeFromLog(line)) all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False) all_pages = [] last_page = None for node in all_nodes: if node.is_root():
from lib.utilities import Logger parser = argparse.ArgumentParser( description='Page reconstruction from weblog using time-based approach.') parser.add_argument('logfile', type=str, help='log file containing the request/response pair') args = parser.parse_args() input_file = args.logfile detected_pageurl = input_file + '.page.tmp' print 'detected pages: %s' % detected_pageurl ###### logging this_log = './log/' + sys.argv[0].replace('.', '_') + '.log' log_h = Logger(this_log) print 'log file: %s' % this_log print 'Reading log...' all_lines = logbasic.read(input_file) print 'Processing rrp...' all_nodes = [] for line in all_lines: all_nodes.append(logbasic.NodeFromLog(line)) all_nodes.sort(lambda x, y: cmp(x, y), lambda x: x.start_time, False) print len(all_nodes) T = [i / 10.0 for i in range(2, 202, 2)]
from lib.myGraph import * from lib.myWeb import WebPage, WebObject from lib.utilities import Logger parser = argparse.ArgumentParser( description='Page reconstruction from weblog using type-based approach.') parser.add_argument('logfile', type=str, help='log file containing the request/response pair') args = parser.parse_args() input_file = args.logfile detected_pageurl = input_file + '.page.tmp' ###### logging this_log = './log/' + sys.argv[0].replace('.', '_') + '.log' log_h = Logger(this_log) print 'log file: %s' % this_log print 'Reading log...' all_lines = logbasic.read(input_file) print 'Processing rrp...' all_nodes = [] for line in all_lines: all_nodes.append(logbasic.NodeFromLog(line)) all_nodes.sort(lambda x, y: cmp(x, y), lambda x: x.start_time, False) all_pages = [] last_page = None for node in all_nodes: if node.is_root():
sys.path.append('..') import lib.logbasic as logbasic from lib.myGraph import * from lib.myWeb import WebPage, WebObject from lib.utilities import Logger parser = argparse.ArgumentParser(description='Page reconstruction from weblog using time-based approach.') parser.add_argument('logfile', type=str, help= 'log file containing the request/response pair') args = parser.parse_args() input_file = args.logfile detected_pageurl = input_file+'.page.tmp' ###### logging this_log = './log/'+sys.argv[0].replace('.', '_')+'.log' log_h = Logger(this_log) print 'log file: %s' % this_log ###### read HTTP log print 'Reading log...' all_lines = logbasic.read(input_file) print 'Processing rrp...' all_nodes = [] for line in all_lines: all_nodes.append(logbasic.NodeFromLog(line)) all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False) print len(all_nodes) T = [i/10.0 for i in range(2, 200, 2)]
# coding: utf-8 # This program extarcts features as input of LIBSVM from log file of web-logger: # [email protected]:caesar0301/web-logger.git # Author: chenxm, 2012-05-21 import json, re, sys, os import hashlib, argparse from lib.myWeb import PageFeature import lib.logbasic as basic import lib.svm as svm from lib.utilities import Logger ###### logger this_log = './log/'+sys.argv[0].replace('.', '_')+'.log' print 'Log file: %s' % this_log log_h = Logger(this_log) def process_log(logfile): ###### preprocess log print 'Processing HTTP logs...' all_lines = basic.read(logfile) all_nodes = [] for line in all_lines: all_nodes.append(basic.NodeFromLog(line)) all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False) return all_nodes def gen_instances(all_nodes, valid_urls): global log_h