config = configparser.ConfigParser() config.read('settings.cfg') esUrl = config['DEFAULT']['ESHost'] if len(argv) > 1: esUrl = argv[1] es = Elasticsearch(esUrl, timeout=1000) # Load features into Elasticsearch initDefaultStore(esUrl) loadFeatures(esUrl) # Parse a judgments movieJudgments = judgmentsByQid( judgmentsFromFile(filename=HUMAN_JUDGMENTS)) movieJudgments = duplicateJudgmentsByWeight(movieJudgments) trainJudgments, testJudgments = partitionJudgments(movieJudgments, testProportion=0.0) # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set # output as "sample_judgments_wfeatures.txt" logFeatures(es, judgmentsByQid=movieJudgments) buildFeaturesJudgmentsFile(trainJudgments, filename=TRAIN_JUDGMENTS) buildFeaturesJudgmentsFile(testJudgments, filename=TEST_JUDGMENTS) # Train each ranklib model type for modelType in [8, 9, 6]: # 0, MART # 1, RankNet # 2, RankBoost
if draw <= testProportion: testJudgments[qid] = judgment else: trainJudgments[qid] = judgment return (trainJudgments, testJudgments) if __name__ == "__main__": from elasticsearch import Elasticsearch from judgments import judgmentsFromFile, judgmentsByQid, duplicateJudgmentsByWeight esUrl = "http://ec2-54-234-184-186.compute-1.amazonaws.com:9616/supersecretsquirrel/" es = Elasticsearch(esUrl, timeout=1000) # Parse a judgments judgments = judgmentsByQid(judgmentsFromFile(filename='osc_judgments.txt')) judgments = duplicateJudgmentsByWeight(judgments) trainJudgments, testJudgments = partitionJudgments(judgments, testProportion=0.00) # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set # output as "osc_judgments_wfeatures.txt" kwDocFeatures(es, index='o19s', searchType='post', judgements=judgments) numFeatures = len(judgments[1][0].features) print("Training on %s features" % numFeatures) buildFeaturesJudgmentsFile(trainJudgments, filename='osc_judgments_wfeatures_train.txt') buildFeaturesJudgmentsFile(testJudgments, filename='osc_judgments_wfeatures_test.txt') # Train each ranklib model type for modelType in [0, 6, 9]: # 0, MART # 1, RankNet