Exemple #1
0
def Myprediction(df, features, clf, name, item_category_list_unique):
    testdf = pd.read_csv('data/test/round1_ijcai_18_test_a_20180301.txt',
                         sep=' ')
    testdf.context_timestamp += 8 * 60 * 60
    testdf = convert_time(testdf)
    testdf.item_category_list.replace(
        item_category_list_unique,
        list(np.arange(len(item_category_list_unique))),
        inplace=True)
    prediction_format = pd.read_csv('data/output/0203.txt', sep=' ')
    train, predict = df, testdf
    clf.fit(train[features],
            train.is_trade,
            eval_set=[(train[features], train.is_trade)],
            eval_metric='logloss',
            verbose=True)
    predict['predicted_score'] = clf.predict_proba(predict[features])[:, 1]
    print(predict[['instance_id', 'predicted_score']])
    prediction_file = pd.merge(prediction_format[['instance_id']],
                               predict[['instance_id', 'predicted_score']],
                               on='instance_id',
                               how='left')
    prediction_file.to_csv('data/output/{}.txt'.format(name),
                           sep=' ',
                           index=None)
    return clf
Exemple #2
0
def base_process(data):
    lbl = preprocessing.LabelEncoder()
    print(
        '--------------------------------------------------------------item--------------------------------------------------------------'
    )
    data['len_item_category'] = data['item_category_list'].map(
        lambda x: len(str(x).split(';')))
    data['len_item_property'] = data['item_property_list'].map(
        lambda x: len(str(x).split(';')))
    for i in range(1, 3):
        data['item_category_list' + str(i)] = lbl.fit_transform(
            data['item_category_list'].map(lambda x: str(str(x).split(';')[i])
                                           if len(str(x).split(';')) > i else
                                           ''))  # item_category_list的第0列全部都一样
    for i in range(10):
        data['item_property_list' + str(i)] = lbl.fit_transform(
            data['item_property_list'].map(lambda x: str(str(x).split(';')[
                i]) if len(str(x).split(';')) > i else ''))
    for col in ['item_id', 'item_brand_id', 'item_city_id']:
        data[col] = lbl.fit_transform(data[col])
    print(
        '--------------------------------------------------------------user--------------------------------------------------------------'
    )
    for col in ['user_id']:
        data[col] = lbl.fit_transform(data[col])
    print('user 0,1 feature')
    data['gender0'] = data['user_gender_id'].apply(lambda x: 1
                                                   if x == -1 else 2)
    data['age0'] = data['user_age_level'].apply(
        lambda x: 1 if x == 1004 | x == 1005 | x == 1006 | x == 1007 else 2)
    data['occupation0'] = data['user_occupation_id'].apply(
        lambda x: 1 if x == -1 | x == 2003 else 2)
    data['star0'] = data['user_star_level'].apply(
        lambda x: 1 if x == -1 | x == 3000 | x == 3001 else 2)
    print(
        '--------------------------------------------------------------context--------------------------------------------------------------'
    )
    #    data['realtime'] = data['context_timestamp'].apply(timestamp_datetime)
    #    data['realtime'] = pd.to_datetime(data['realtime'])
    #    data['day'] = data['realtime'].dt.day
    #    data['hour'] = data['realtime'].dt.hour
    data = convert_time(data)
    data['len_predict_category_property'] = data[
        'predict_category_property'].map(lambda x: len(str(x).split(';')))
    for i in range(5):
        data['predict_category_property' + str(i)] = lbl.fit_transform(
            data['predict_category_property'].map(lambda x: str(
                str(x).split(';')[i]) if len(str(x).split(';')) > i else ''))
    print('context 0,1 feature')
    data['context_page0'] = data['context_page_id'].apply(
        lambda x: 1
        if x == 4001 | x == 4002 | x == 4003 | x == 4004 | x == 4007 else 2)
    print(
        '--------------------------------------------------------------shop--------------------------------------------------------------'
    )
    for col in ['shop_id']:
        data[col] = lbl.fit_transform(data[col])
    data['shop_score_delivery0'] = data['shop_score_delivery'].apply(
        lambda x: 0 if x <= 0.98 and x >= 0.96 else 1)
    return data
Exemple #3
0
            verbose=True)
    predict['predicted_score'] = clf.predict_proba(predict[features])[:, 1]
    print(predict[['instance_id', 'predicted_score']])
    prediction_file = pd.merge(prediction_format[['instance_id']],
                               predict[['instance_id', 'predicted_score']],
                               on='instance_id',
                               how='left')
    prediction_file.to_csv('data/output/{}.txt'.format(name),
                           sep=' ',
                           index=None)
    return clf


df = pd.read_csv('data/train/round1_ijcai_18_train_20180301.txt', sep=' ')
df.context_timestamp += 8 * 60 * 60
df = convert_time(df)
item_category_list_unique = list(np.unique(df.item_category_list))
df.item_category_list.replace(item_category_list_unique,
                              list(np.arange(len(item_category_list_unique))),
                              inplace=True)
label = 'is_trade'

start_features = [
    'item_category_list',
    'item_city_id',
    'item_price_level',
    'item_sales_level',
    'item_collected_level',
    'item_pv_level',
    'user_gender_id',
    'user_age_level',