コード例 #1
0
def train_vw(filename, output):

    with open(filename, 'r') as f:
        xx = f.readlines()
        y = [int(x[0]) - 1 for x in xx]

    y = np.asarray(y)
    xx = np.asarray(xx)
    cv = StratifiedKFold(y, 4)

    os.popen('rm -f /home/rodion/facebids/tmp/*')

    with open(output, 'w') as fw:
        fw.write('bidder_id,proba\n')
        for a, b in cv:
            y_a, y_b = y[a], y[b]
            xx_a, xx_b = xx[a], xx[b]

            vw = VW('/home/' + os.getenv('USER') + '/bin/vw', '/home/rodion/facebids/tmp/',
                    {'loss_function': 'hinge', 'passes': '10', 'holdout_off': False,
                     'learning_rate': '0.9961190941783473', 'b': '24'})

            vw.fit(xx_a, y_a)
            tags, probas = vw.predict_tag_proba(xx_b)
            for tag, proba in itertools.izip(tags, probas):
                fw.write(tag + ',' + str(proba[1]) + '\n')
コード例 #2
0
ファイル: classify.py プロジェクト: rodion-zheludkov/kaggle
def classify_test_vw(train_filename, test_filename, proba_filename):
    vw = VW('/home/' + os.getenv('USER') + '/bin/vw', '/home/rodion/facebids/tmp/',
            {'loss_function': 'hinge', 'passes': '10', 'holdout_off': False,
             'learning_rate': '0.9961190941783473', 'b': '24'})

    with open(train_filename, 'r') as f:
        xx = f.readlines()
        y = [int(x[0]) - 1 for x in xx]
        vw.fit(xx, y)

    with open(test_filename, 'r') as f, open(proba_filename, 'w') as fw:
        xx = f.readlines()
        tags, probas = vw.predict_tag_proba(xx)

        fw.write('bidder_id,proba\n')
        for tag, proba in itertools.izip(tags, probas):
            fw.write(tag + ',' + str(proba[1]) + '\n')