def get_result(self, url, html_str, keyword, last_modified): ''' To calculate the number of keyword in html_str, and before doing real analyzing, fetch the record by using last_modified to find whether the record had already done before, to save the computing power of server Args: url (str): The url that you want to analyze html_str (str): The html string that requested from url keyword (str): The keyword that you want to find in url last_modified (str): HTTP last_modified of website Returns: tuple: (The count number (int), need_to_save_db (boolean)) ''' record_query_cache = model.Record(url, keyword) record_query_cache.query() if last_modified is not None and record_query_cache.result is not None: if record_query_cache.last_modified == last_modified: return (record_query_cache.result, False) return (self.count_from_string(html_str, keyword), True)
def test_add_record(self): print("make a fake test record db") r = model.Record('test', 'www.google.com', 99, 'Sat, 09 Mar 2019 13:44:12 GMT') r.add()
def test_query_no_record(self): r = model.Record('test2', 'www.googlegoogle.com') r.query() self.assertIs(r.result, None, 'Not none for dismatch')
def test_query_record(self): print("test query") r = model.Record('test', 'www.google.com') r.query() self.assertEqual(r.result, 99, 'write db and read db are not the same')
def save_record_to_db(url, keyword, result, last_modified): ''' Save the record of analyzing to the Record DB ''' record = model.Record(url, keyword, result, last_modified) record.add()