IMPRESSION_FIELDS=['bid_id','timestamp','type','ipinyou_id','user-agent','ip','region','city','ad_exchange','domain','url','anonymous_url_id','ad_slot_id','ad_slot_width','ad_slot_height','ad_slot_visibility','ad_slot_format','ad_slot_floor_price','creative_id','bidding_price','paying_price','key_page_url']



with open(output,'w') as out:
    for t,line in enumerate(read):
        data=line.split('\t')
        categorical_features=[]
        conjunctive_features=[]

        categorical_features.append('hour={0}'.format(data[IMPRESSION_FIELDS.index('timestamp')][8:10]))
        categorical_features.append('{0}={1}'.format('ipinyou_id',data[IMPRESSION_FIELDS.index('ipinyou_id')]))


        categorical_features.append('{0}={1}'.format('ua',parseUserAgent(data[IMPRESSION_FIELDS.index('user-agent')].strip(' '))))

        categorical_features.append('{0}={1}'.format('ip',data[IMPRESSION_FIELDS.index('ip')]))
        categorical_features.append('{0}={1}'.format('region',data[IMPRESSION_FIELDS.index('region')]))
        categorical_features.append('{0}={1}'.format('city',data[IMPRESSION_FIELDS.index('city')]))
        categorical_features.append('{0}={1}'.format('ad_exchange',data[IMPRESSION_FIELDS.index('ad_exchange')]))
        categorical_features.append('{0}={1}'.format('domain',data[IMPRESSION_FIELDS.index('domain')]))
        categorical_features.append('{0}={1}'.format('url',data[IMPRESSION_FIELDS.index('url')]))
        categorical_features.append('{0}={1}'.format('ad_slot_id',data[IMPRESSION_FIELDS.index('ad_slot_id')]))
        categorical_features.append('{0}={1}'.format('ad_slot_visibility',data[IMPRESSION_FIELDS.index('ad_slot_visibility')]))
        categorical_features.append('{0}={1}'.format('ad_slot_format',data[IMPRESSION_FIELDS.index('ad_slot_format')]))
        categorical_features.append('{0}={1}'.format('creative_id',data[IMPRESSION_FIELDS.index('creative_id')]))
        categorical_features.append('{0}={1}'.format('key_page_url',data[IMPRESSION_FIELDS.index('key_page_url')]))


        conjunctive_features.append('ad_slot_size={0}{1}'.format(data[IMPRESSION_FIELDS.index('ad_slot_width')],data[IMPRESSION_FIELDS.index('ad_slot_height')]))
for t,line in enumerate(open(clk)):
    data=line.split('\t')
    clk_bid_id.add(getVal(data,'bid_id'))

valid=open(validation,'w')
valid.write('Id,Label\n')
with open(output,'w') as out:
    for t,line in enumerate(open(imp)):
        data=line.split('\t')
        categorical_features=[]
        conjunctive_features=[]

        categorical_features.append(getTime(data))

        categorical_features.append('{0}={1}'.format('ua',parseUserAgent(getVal(data,'user-agent').strip(' '))))
        categorical_features.append(getVal(data,'region'))
        categorical_features.append(getVal(data,'city'))
        categorical_features.append(getVal(data,'ad_exchange'))
        categorical_features.append(getVal(data,'domain'))
        categorical_features.append(getVal(data,'ad_slot_id'))
        categorical_features.append(getVal(data,'ad_slot_visibility'))
        categorical_features.append(getVal(data,'ad_slot_format'))
        categorical_features.append(getVal(data,'creative_id'))
        categorical_features.append(getVal(data,'key_page_url'))

        conjunctive_features.append('{0}{1}'.format(getVal(data,'ad_slot_width'),getVal(data,'ad_slot_height')))

        if(getVal(data,'bid_id') in clk_bid_id):
            label=1
    #        print(getVal(data,'bid_id'))
Beispiel #3
0
for t, line in enumerate(open(clk)):
    data = line.split('\t')
    clk_bid_id.add(getVal(data, 'bid_id'))

valid = open(validation, 'w')
with open(output, 'w') as out:
    for t, line in enumerate(open(imp)):
        data = line.split('\t')
        categorical_features = []
        conjunctive_features = []

        categorical_features.append(getTime(data))

        categorical_features.append('{0}={1}'.format(
            'ua', parseUserAgent(getVal(data, 'user-agent').strip(' '))))
        categorical_features.append(getVal(data, 'region'))
        categorical_features.append(getVal(data, 'city'))
        categorical_features.append(getVal(data, 'ad_exchange'))
        categorical_features.append(getVal(data, 'domain'))
        categorical_features.append(getVal(data, 'ad_slot_id'))
        categorical_features.append(getVal(data, 'ad_slot_visibility'))
        categorical_features.append(getVal(data, 'ad_slot_format'))
        categorical_features.append(getVal(data, 'creative_id'))
        categorical_features.append(getVal(data, 'key_page_url'))

        conjunctive_features.append('{0}{1}'.format(
            getVal(data, 'ad_slot_width'), getVal(data, 'ad_slot_height')))

        if (getVal(data, 'bid_id') in clk_bid_id):
            label = 1