class TestDataCollection(unittest.TestCase): def setUp(self): self.start_url = "https://www.goodreads.com/book/show/53175355-many-points-of-me" self.connection_string = os.getenv('MONGO_CONNECTION_STRING') self.testDB = DataCollection(self.connection_string, "testDatabase", "testCollection") def testPushToBookCollection(self): self.testDB.empty_data_collection() test = {"url": 1, "test": 2} self.testDB.push_to_collection(test) self.assertEqual(True, self.testDB.document_already_exist(test)) def testempty_data_collection(self): self.testDB.empty_data_collection() self.assertEqual(0, self.testDB.get_collection_size()) def testget_collection_size(self): self.testDB.empty_data_collection() test1 = {"url": 3, "test": 2} test2 = {"url": 1, "test": 1} self.testDB.push_to_collection(test1) self.testDB.push_to_collection(test2) self.assertEqual(2, self.testDB.get_collection_size()) def testdocument_already_exist(self): self.testDB.empty_data_collection() test1 = {"url": 3, "test": 2} test2 = {"url": 1, "test": 1} self.testDB.push_to_collection(test1) self.assertEqual(True, self.testDB.document_already_exist(test1)) self.assertEqual(False, self.testDB.document_already_exist(test2))
def import_json(data_collection_type, file_path): """Import information in a json file to the database Args: dataCollectionType (str): Name of data collection, either 'book' or 'author' file_path (str): Path of json file to extract info from """ if data_collection_type not in ('book', 'author'): print("Error: no collection named " + data_collection_type + ", please enter 'book' or 'author' ") return datacollection = DataCollection(MONGO_CONNECTION_STRING, 'goodReads', data_collection_type) with open(file_path) as file: file_data = json.load(file) for entry in file_data: if "_id" in entry: del entry["_id"] if not datacollection.document_already_exist(entry): datacollection.push_to_collection(entry)