validation_queries.save('data/MQ2007/Fold1/validation') test_queries.save('data/MQ2007/Fold1/test') # ... because loading them will be then faster. training_queries = Queries.load('data/MQ2007/Fold1/training') validation_queries = Queries.load('data/MQ2007/Fold1/validation') test_queries = Queries.load('data/MQ2007/Fold1/test') logging.info('=' * 80) # Set this to True in order to remove queries containing all documents # of the same relevance score -- these are useless for LambdaMART. remove_useless_queries = False # Find constant query-document features. cfs = find_constant_features( [training_queries, validation_queries, test_queries]) # Get rid of constant features and (possibly) remove useless queries. training_queries.adjust(remove_features=cfs, purge=remove_useless_queries) validation_queries.adjust(remove_features=cfs, purge=remove_useless_queries) test_queries.adjust(remove_features=cfs) # Print basic info about query datasets. logging.info('Train queries: %s' % training_queries) logging.info('Valid queries: %s' % validation_queries) logging.info('Test queries: %s' % test_queries) logging.info('=' * 80) param_grid = { 'metric': ['nDCG@10'],
test_queries.save('data/MQ2007/Fold1/test') # ... because loading them will be then faster. training_queries = Queries.load('data/MQ2007/Fold1/training') validation_queries = Queries.load('data/MQ2007/Fold1/validation') test_queries = Queries.load('data/MQ2007/Fold1/test') logging.info('=' * 80) # Set this to True in order to remove queries containing all documents # of the same relevance score -- these are useless for LambdaMART. remove_useless_queries = False # Find constant query-document features. cfs = find_constant_features([training_queries, validation_queries, test_queries]) # Get rid of constant features and (possibly) remove useless queries. training_queries.adjust(remove_features=cfs, purge=remove_useless_queries) validation_queries.adjust(remove_features=cfs, purge=remove_useless_queries) test_queries.adjust(remove_features=cfs) # Print basic info about query datasets. logging.info('Train queries: %s' % training_queries) logging.info('Valid queries: %s' % validation_queries) logging.info('Test queries: %s' % test_queries) logging.info('=' * 80) param_grid = {'metric': ['NDCG@10'],