コード例 #1
0
ファイル: WPIType.py プロジェクト: caesar0301/wpi-svm
sys.path.append('..')
import lib.logbasic as logbasic
from lib.myGraph import *
from lib.myWeb import WebPage, WebObject
from lib.utilities import Logger

parser = argparse.ArgumentParser(description='Page reconstruction from weblog using type-based approach.')
parser.add_argument('logfile', type=str, help= 'log file containing the request/response pair')
args = parser.parse_args()
input_file = args.logfile
detected_pageurl = input_file+'.page.tmp'

###### logging
this_log = './log/'+sys.argv[0].replace('.', '_')+'.log'
log_h = Logger(this_log)
print 'log file: %s' % this_log

print 'Reading log...'
all_lines = logbasic.read(input_file)

print 'Processing rrp...'
all_nodes = []
for line in all_lines:
	all_nodes.append(logbasic.NodeFromLog(line))
all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False)

all_pages = []
last_page = None
for node in all_nodes:
	if node.is_root():
コード例 #2
0
from lib.utilities import Logger

parser = argparse.ArgumentParser(
    description='Page reconstruction from weblog using time-based approach.')
parser.add_argument('logfile',
                    type=str,
                    help='log file containing the request/response pair')
args = parser.parse_args()
input_file = args.logfile
detected_pageurl = input_file + '.page.tmp'

print 'detected pages: %s' % detected_pageurl

###### logging
this_log = './log/' + sys.argv[0].replace('.', '_') + '.log'
log_h = Logger(this_log)
print 'log file: %s' % this_log

print 'Reading log...'
all_lines = logbasic.read(input_file)

print 'Processing rrp...'
all_nodes = []
for line in all_lines:
    all_nodes.append(logbasic.NodeFromLog(line))
all_nodes.sort(lambda x, y: cmp(x, y), lambda x: x.start_time, False)

print len(all_nodes)

T = [i / 10.0 for i in range(2, 202, 2)]
コード例 #3
0
from lib.myGraph import *
from lib.myWeb import WebPage, WebObject
from lib.utilities import Logger

parser = argparse.ArgumentParser(
    description='Page reconstruction from weblog using type-based approach.')
parser.add_argument('logfile',
                    type=str,
                    help='log file containing the request/response pair')
args = parser.parse_args()
input_file = args.logfile
detected_pageurl = input_file + '.page.tmp'

###### logging
this_log = './log/' + sys.argv[0].replace('.', '_') + '.log'
log_h = Logger(this_log)
print 'log file: %s' % this_log

print 'Reading log...'
all_lines = logbasic.read(input_file)

print 'Processing rrp...'
all_nodes = []
for line in all_lines:
    all_nodes.append(logbasic.NodeFromLog(line))
all_nodes.sort(lambda x, y: cmp(x, y), lambda x: x.start_time, False)

all_pages = []
last_page = None
for node in all_nodes:
    if node.is_root():
コード例 #4
0
ファイル: WPITimeType.py プロジェクト: caesar0301/wpi-svm
sys.path.append('..')
import lib.logbasic as logbasic
from lib.myGraph import *
from lib.myWeb import WebPage, WebObject
from lib.utilities import Logger

parser = argparse.ArgumentParser(description='Page reconstruction from weblog using time-based approach.')
parser.add_argument('logfile', type=str, help= 'log file containing the request/response pair')
args = parser.parse_args()
input_file = args.logfile
detected_pageurl = input_file+'.page.tmp'

###### logging
this_log = './log/'+sys.argv[0].replace('.', '_')+'.log'
log_h = Logger(this_log)
print 'log file: %s' % this_log

###### read HTTP log
print 'Reading log...'
all_lines = logbasic.read(input_file)

print 'Processing rrp...'
all_nodes = []
for line in all_lines:
	all_nodes.append(logbasic.NodeFromLog(line))
all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False)

print len(all_nodes)

T = [i/10.0 for i in range(2, 200, 2)]
コード例 #5
0
# coding: utf-8
# This program extarcts features as input of LIBSVM from log file of web-logger:
# 	[email protected]:caesar0301/web-logger.git
# Author: chenxm, 2012-05-21
import json, re, sys, os
import hashlib, argparse

from lib.myWeb import PageFeature
import lib.logbasic as basic
import lib.svm as svm
from lib.utilities import Logger

###### logger
this_log = './log/'+sys.argv[0].replace('.', '_')+'.log'
print 'Log file: %s' % this_log
log_h = Logger(this_log)

					
def process_log(logfile):
	###### preprocess log
	print 'Processing HTTP logs...'
	all_lines = basic.read(logfile)
	all_nodes = []
	for line in all_lines:
		all_nodes.append(basic.NodeFromLog(line))
	all_nodes.sort(lambda x,y: cmp(x,y), lambda x: x.start_time, False)
	return all_nodes

def gen_instances(all_nodes, valid_urls):
	global log_h