class ArticleRankerTest(unittest.TestCase): def setUp(self): fill_database() config_ = load_config(file_path="/vagrant/config.yaml", logger=logger) self.feature_extractor = EsaFeatureExtractor(prefix=config_['prefix']) self.ranker = ArticleRanker(extractor=self.feature_extractor) self.article_as_dict = {'news_vendor': 'TechCrunch', 'author': "MG Siegler", 'link': "http://www.techcrunch.com", 'headline': "Again Apple", 'clean_content': "Fooobaaar!", 'content': "<p>Fooobaaar!</p>", 'features': {'version': '1.0', 'data': [(1, 0.5), (3, 0.6) ] } } def tearDown(self): clear_database() def test_get_vendor_false(self): vendor = self.ranker.get_vendor({'news_vendor': 'not in db'}) self.assertEqual(vendor, None) def test_get_vendor(self): vendor = self.ranker.get_vendor(self.article_as_dict) self.assertEqual(vendor.config, 'vendor config') def test_save_article_false(self): vendor = self.ranker.get_vendor(self.article_as_dict) stored_article = self.ranker.save_article(vendor, {'headline': "Everything else is missing."}) self.assertEqual(stored_article, None) def test_save_article(self): vendor = self.ranker.get_vendor(self.article_as_dict) stored_article = self.ranker.save_article(vendor, self.article_as_dict) self.assertEqual(stored_article.author, 'MG Siegler') def test_save_rating(self): vendor = self.ranker.get_vendor(self.article_as_dict) stored_article = self.ranker.save_article(vendor, self.article_as_dict) user = User.objects(email="*****@*****.**").first() self.ranker.save_rating(user=user, article=stored_article, rating=1.0) user.reload() ranked_articles = RankedArticle.objects(user_id=user.id) self.assertEqual(3, ranked_articles.count()) self.assertEqual(1.0, ranked_articles[0].rating) def test_rank_article(self): pass #some error in gensim. probably because some features are not quite right self.ranker.rank_article(self.article_as_dict)
class ArticleRankerTest(unittest.TestCase): def setUp(self): fill_database() add_vendors() config_ = load_config(file_path="/vagrant/config.yaml", logger=logger) self.feature_extractor = TfidfFeatureExtractor(prefix=config_['prefix']) self.ranker = ArticleRanker(extractor=self.feature_extractor) self.body = '{"news_vendor": "boingboing", ' \ '"features": ' \ '{"version": "TF-IDF-1.1", ' \ '"data": [[87, 1.0]]}, ' \ '"author": "David Pescovitz", ' \ '"headline": "Documentary about Astro Boy creator Osamu\\u00a0Tezuka",' \ ' "content": "<p class=\\"byline permalink\\"><a href=\\"http://boingboing.net/author/david_pescovitz\\" title=\\"Posts by David Pescovitz\\" rel=\\"author\\">David Pescovitz</a> at 9:33 am Wed, Oct 23, 2013 \\n\\n\\n\\n</p>", ' \ '"clean_content": "David Pescovitz at 9:33 am Wed, Oct 23, 2013 ",' \ ' "link": "http://rss.feedsportal.com/c/35208/f/653965/s/32d1ba95/sc/38/l/0Lboingboing0Bnet0C20A130C10A0C230Cdocumentary0Eabout0Eastro0Eboy0Ecr0Bhtml/story01.htm"}' self.article_as_dict = json.loads(self.body) def tearDown(self): #clear_database() pass def test_get_vendor(self): vendor = self.ranker.get_vendor(self.article_as_dict) self.assertEqual(self.article_as_dict.get('news_vendor'), vendor.name) def test_save_article(self): vendor = self.ranker.get_vendor(self.article_as_dict) stored_article = self.ranker.save_article(vendor, self.article_as_dict) self.assertEqual(stored_article.author, 'David Pescovitz') def test_save_rating(self): vendor = self.ranker.get_vendor(self.article_as_dict) stored_article = self.ranker.save_article(vendor, self.article_as_dict) user = User.objects(email="*****@*****.**").first() # Store number of ranked items ranked_articles = RankedArticle.objects(user_id=user.id) ranked_articles_before = ranked_articles.count() # Store new ranked article self.ranker.save_rating(user=user, article=stored_article, rating=1.0) user.reload() ranked_articles_after = RankedArticle.objects(user_id=user.id).count() self.assertEqual(1, ranked_articles_after - ranked_articles_before) self.assertEqual(1.0, ranked_articles[ranked_articles_before].rating) def test_rank_article(self): pass #some error in gensim. probably because some features are not quite right self.ranker.rank_article(self.article_as_dict)
class StompListener(object): def __init__(self, config): self.config_ = config self.logger = logging.getLogger("main") self.stdout = sys.stdout # Connect to mongo database try: connect(config['database']['db-name'], username=config['database']['user'], password=config['database']['passwd'], port=config['database']['port']) except ConnectionError as e: logger.error("Could not connect to mongodb: %s" % e) sys.exit(1) logger.info("Load feature extractor.") try: self.feature_extractor_ = LdaFeatureExtractor(prefix=self.config_["prefix"]) except Exception as inst: logger.error("Could not load feature extractor." "Unknown error %s: %s" % (type(inst), inst)) sys.exit(1) self.ranker = ArticleRanker(extractor=self.feature_extractor_) def rank_article(self, article_as_dict): self.ranker.rank_article(article_as_dict) @staticmethod def on_error(self, message): logger.error('received an error %s' % message) def on_message(self, headers, message): received_message = json.loads(message) #save and rank article logger.info("*Ranked article* -> " + message) self.rank_article(received_message) def __print_async(self, frame_type, headers, body): """ Utility function to print a message and setup the command prompt for the next input """ self.__sysout("\r \r", end='') self.__sysout(frame_type) for header_key in headers.keys(): self.__sysout('%s: %s' % (header_key, headers[header_key])) self.__sysout('') self.__sysout(body) self.__sysout('> ', end='') self.stdout.flush() def on_connected(self, headers, body): self.__print_async("CONNECTED", headers, body) def __error(self, msg, end="\n"): self.stdout.write(str(msg) + end) def __sysout(self, msg, end="\n"): self.stdout.write(str(msg) + end) def on_send(self, headers, body): self.__print_async("SEND", headers, body)
#ranker.rank_article(article_as_dict) # Save article article_vendor = ranker.get_vendor(article_as_dict) print "VENDOR :", article_vendor.name stored_article = ranker.save_article(article_vendor, article_as_dict) print "AUTHOR:", stored_article.author # Get user user = User.objects(email="*****@*****.**").first() print "USER: "******"N RANKED_ARTICLES: ", ranked_articles.count() # Store new ranked article ranker.save_rating(user=user, article=stored_article, rating=1.0) user.reload() ranker.rank_article(article_as_dict)