Exemple #1
0
import sys
sys.path.append('./')
import time_stamp

start_date = [
    '20150101', '20150115', '20150129', '20150212', '20150226', '20150312',
    '20150326', '20150409', '20150423', '20150526'
]
end_date = [
    '20150129', '20150212', '20150226', '20150312', '20150326', '20150409',
    '20150423', '20150507', '20150521', '20150623'
]
start_time = [0] * 10
for i in range(len(start_date)):
    start_time[i] = time_stamp.get_seconds(start_date[i])
end_time = [0] * 10
for i in range(len(end_date)):
    end_time[i] = time_stamp.get_seconds(end_date[i])

for line in sys.stdin:
    arr = line.strip().split(',')
    user = arr[0]
    browser = arr[4]
    time = int(arr[7])
    for i in range(10):
        if time >= start_time[i] and time <= end_time[i]:
            print str(i) + '#' + user + '#' + browser + '\t' + line.strip()
Exemple #2
0
caid_feature = {}
for line in open('feature_caid_9'):
    line = line.strip()
    idx = line.index(' ')
    caid = line[2:idx]
    caid_feature[caid] = line[idx + 1:]

browser_feature = {}
for line in open('feature_browser_9'):
    line = line.strip()
    idx = line.index(' ')
    user = line[2:idx]
    browser_feature[user] = line[idx + 1:]

begin_time = time_stamp.get_seconds(sys.argv[1])
end_time = time_stamp.get_seconds(sys.argv[2])
test_end_time = time_stamp.get_seconds(sys.argv[3])
last_user = ''
cur_user = ''
user_list = []

for line in sys.stdin:
    line = line.strip()
    arr = line.split('\t')
    cur_user = arr[0]
    temp = arr[1]
    arr = temp.split(',')
    time = int(arr[7])
    if time < begin_time or time > test_end_time:
        continue
Exemple #3
0
exposure = [0] * 5
large_click_time = 999999999
large_exposure_time = 999999999
click_user_dict = {}
click_week = [0] * 5
exposure_week = [0] * 5
click_5days = [0] * 5
exposure_5days = [0] * 5

click_week_set = set()
exposure_week_set = set()

for line in sys.stdin:
    arr = line.strip().split('\t')
    cur_spid = arr[0]
    end_time = time_stamp.get_seconds(end_date[int(cur_spid[0])])
    arr = arr[1].split(',')
    user = arr[0]
    time = time_stamp.get_date(arr[7])
    day_diff = time_stamp.get_date_diff(arr[7], end_time)
    week = time_stamp.get_week(arr[7])
    if cur_spid != last_spid and last_spid != '':
        #print the last_spid
        spid_str = last_spid

        all_exposure_num = all_click_num + all_exposure_num
        spid_str = spid_str + ' 67:' + str(all_exposure_num)
        if all_click_num > 0:
            spid_str = spid_str + ' 68:' + str(
                all_click_num) + ' 69:' + my_format.string_cf(
                    all_click_num, all_exposure_num)