Exemple #1
0
    config = configparser.ConfigParser()
    config.read('settings.cfg')
    esUrl = config['DEFAULT']['ESHost']
    if len(argv) > 1:
        esUrl = argv[1]

    es = Elasticsearch(esUrl, timeout=1000)

    # Load features into Elasticsearch
    initDefaultStore(esUrl)
    loadFeatures(esUrl)
    # Parse a judgments
    movieJudgments = judgmentsByQid(
        judgmentsFromFile(filename=HUMAN_JUDGMENTS))
    movieJudgments = duplicateJudgmentsByWeight(movieJudgments)
    trainJudgments, testJudgments = partitionJudgments(movieJudgments,
                                                       testProportion=0.0)

    # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set
    # output as "sample_judgments_wfeatures.txt"
    logFeatures(es, judgmentsByQid=movieJudgments)

    buildFeaturesJudgmentsFile(trainJudgments, filename=TRAIN_JUDGMENTS)
    buildFeaturesJudgmentsFile(testJudgments, filename=TEST_JUDGMENTS)

    # Train each ranklib model type
    for modelType in [8, 9, 6]:
        # 0, MART
        # 1, RankNet
        # 2, RankBoost
Exemple #2
0
        if draw <= testProportion:
            testJudgments[qid] = judgment
        else:
            trainJudgments[qid] = judgment

    return (trainJudgments, testJudgments)


if __name__ == "__main__":
    from elasticsearch import Elasticsearch
    from judgments import judgmentsFromFile, judgmentsByQid, duplicateJudgmentsByWeight
    esUrl = "http://ec2-54-234-184-186.compute-1.amazonaws.com:9616/supersecretsquirrel/"
    es = Elasticsearch(esUrl, timeout=1000)
    # Parse a judgments
    judgments = judgmentsByQid(judgmentsFromFile(filename='osc_judgments.txt'))
    judgments = duplicateJudgmentsByWeight(judgments)
    trainJudgments, testJudgments = partitionJudgments(judgments,
                                                       testProportion=0.00)
    # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set
    # output as "osc_judgments_wfeatures.txt"
    kwDocFeatures(es, index='o19s', searchType='post', judgements=judgments)
    numFeatures = len(judgments[1][0].features)
    print("Training on %s features" % numFeatures)
    buildFeaturesJudgmentsFile(trainJudgments,
                               filename='osc_judgments_wfeatures_train.txt')
    buildFeaturesJudgmentsFile(testJudgments,
                               filename='osc_judgments_wfeatures_test.txt')
    # Train each ranklib model type
    for modelType in [0, 6, 9]:
        # 0, MART
        # 1, RankNet