コード例 #1
0
ファイル: convert_corpus.py プロジェクト: JOSMANC/nyan
                               '[%(pathname)s:%(lineno)d]: %(message)s')
 ch.setFormatter(formatter)
 fh.setFormatter(formatter)
 # add the handlers to logger
 logger.addHandler(ch)
 logger.addHandler(fh)
 
 logger.info("running %s" % ' '.join(sys.argv))
 
 #Load Reuters-21578 dataset
 if options.path is None:
     logger.error("Path to Reuters-21578 dataset not set.")
     sys.exit(1) 
     
 #Load feature models and extractor
 #feature_extractor = LdaBowFeatureExtractor(prefix = options.prefix)
 #feature_extractor = LdaFeatureExtractor(prefix = options.prefix)
 feature_extractor = cEsaFeatureExtractor(prefix = options.path + '/models/cesa_on_reuters/wiki')
 
 #Load database, training and test data
 db = FileDatabase.load(options.path + '/reuters.db')
 split = R8Split(db)
 
 training_data = train_data(feature_extractor, split)
 test_data = test_data(feature_extractor, split)
     
 # store training and test data
 np.save(options.path + '/training_data_cesa.npy', training_data)
 np.save(options.path + '/test_data_cesa.npy', test_data)
 
 logger.info("done.")
コード例 #2
0
ファイル: evaluate.py プロジェクト: JOSMANC/nyan
 if config_ == None:
     logger.error("No config. Exit.")
     sys.exit(1)
     
 #Connect to mongo database
 connect(config_['database']['db-name'], 
         username= config_['database']['user'], 
         password= config_['database']['passwd'], 
         port = config_['database']['port'])
 
 #Load feature extractor
 #feature_extractor = EsaFeatureExtractor(prefix = config_['prefix'])
 #feature_extractor = TfidfFeatureExtractor(prefix = config_['prefix'])
 #feature_extractor = LdaFeatureExtractor(prefix = config_['prefix'])
 #feature_extractor = LdaBowFeatureExtractor(prefix = config_['prefix'])
 feature_extractor = cEsaFeatureExtractor(prefix = config_['prefix'])
 
 #get user
 user = User.objects(email=u"*****@*****.**").first()
 
 ranked_article_ids = (a.article.id 
                       for a 
                       in RankedArticle.objects(user_id = user.id).only("article"))
 all_article_ids = Set(a.id 
                       for a 
                       in Article.objects(id__in = ranked_article_ids).only("id"))
 
 read_article_ids = Set(a.article.id 
                        for a 
                        in ReadArticleFeedback.objects(user_id = user.id).only("article"))