'[%(pathname)s:%(lineno)d]: %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) # add the handlers to logger logger.addHandler(ch) logger.addHandler(fh) logger.info("running %s" % ' '.join(sys.argv)) #Load Reuters-21578 dataset if options.path is None: logger.error("Path to Reuters-21578 dataset not set.") sys.exit(1) #Load feature models and extractor #feature_extractor = LdaBowFeatureExtractor(prefix = options.prefix) #feature_extractor = LdaFeatureExtractor(prefix = options.prefix) feature_extractor = cEsaFeatureExtractor(prefix = options.path + '/models/cesa_on_reuters/wiki') #Load database, training and test data db = FileDatabase.load(options.path + '/reuters.db') split = R8Split(db) training_data = train_data(feature_extractor, split) test_data = test_data(feature_extractor, split) # store training and test data np.save(options.path + '/training_data_cesa.npy', training_data) np.save(options.path + '/test_data_cesa.npy', test_data) logger.info("done.")
if config_ == None: logger.error("No config. Exit.") sys.exit(1) #Connect to mongo database connect(config_['database']['db-name'], username= config_['database']['user'], password= config_['database']['passwd'], port = config_['database']['port']) #Load feature extractor #feature_extractor = EsaFeatureExtractor(prefix = config_['prefix']) #feature_extractor = TfidfFeatureExtractor(prefix = config_['prefix']) #feature_extractor = LdaFeatureExtractor(prefix = config_['prefix']) #feature_extractor = LdaBowFeatureExtractor(prefix = config_['prefix']) feature_extractor = cEsaFeatureExtractor(prefix = config_['prefix']) #get user user = User.objects(email=u"*****@*****.**").first() ranked_article_ids = (a.article.id for a in RankedArticle.objects(user_id = user.id).only("article")) all_article_ids = Set(a.id for a in Article.objects(id__in = ranked_article_ids).only("id")) read_article_ids = Set(a.article.id for a in ReadArticleFeedback.objects(user_id = user.id).only("article"))