def test_fieldfile_nomatch(self): fc = FieldFile(f("data/AandE_Data_2011-04-10.tff")) parser = LineToDictParser(fc) reader = FileReader(f('data/inventory.csv'), has_header=True) bw = FileWriter(self._col, reader=reader, parser=parser) with self.assertRaises(ValueError): bw.write()
def test_delimiter_header(self): start_count = self._col.count_documents({}) fc = FieldFile(f("data/AandE_Data_2011-04-10.tff")) parser = LineToDictParser(fc) reader = FileReader(f('data/AandE_Data_2011-04-10.csv'), has_header=True) bw = FileWriter(self._col, reader=reader, parser=parser) bw.write() self.assertEqual(self._col.count_documents({}) - start_count, 300)
def test_delimiter_no_header(self): start_count = self._col.count_documents({}) fc = FieldFile(f("data/10k.tff")) parser = LineToDictParser(fc) reader = FileReader(f("data/10k.txt"), has_header=False, delimiter="|") bw = FileWriter(self._col, reader=reader, parser=parser) bw.write() self.assertEqual(self._col.count_documents({}) - start_count, 10000)
def test_new_delimiter_and_timeformat_header(self): start_count = self._col.count_documents({}) fc = FieldFile(f("data/mot.tff")) parser = LineToDictParser(fc) reader = FileReader(f('data/mot_test_set_small.csv'), has_header=False, delimiter="|") self.assertTrue(type(reader.name) == str) bw = FileWriter(self._col, reader=reader, parser=parser) total = bw.write() lines = LineCounter(f('data/mot_test_set_small.csv')).line_count inserted_count = self._col.count_documents({}) - start_count self.assertEqual(inserted_count, total) self.assertEqual(inserted_count, lines)
def test_date(self): config = FieldFile(f("data/inventory_dates.tff")) parser = LineToDictParser(config, locator=False) # screws up comparison later if locator is true reader = FileReader(f("data/inventory.csv"), has_header=True) start_count = self._col.count_documents({}) writer = FileWriter(self._col, reader=reader, parser=parser) docs_written = writer.write() line_count = LineCounter(f("data/inventory.csv")).line_count self.assertEqual(self._col.count_documents({}) - start_count, line_count - 1) # header must be subtracted self.assertEqual(self._col.count_documents({}), docs_written) nuts_doc = self._col.find_one({"Last Order": dateutil.parser.parse("29-Feb-2016")}) self.assertTrue(nuts_doc)
def test_generate_fieldfile(self): fc = FieldFile.generate_field_file(f("data/inventory.csv"), ext="testff") self.assertEqual(fc.field_filename, f("data/inventory.testff"), fc.field_filename) self.assertTrue(os.path.isfile(f("data/inventory.testff")), f("data/inventory.testff")) parser = LineToDictParser(fc) reader = FileReader(f("data/inventory.csv"), has_header=True) start_count = self._col.count_documents({}) writer = FileWriter(self._col, reader=reader, parser=parser) write_count = writer.write() line_count = LineCounter(f("data/inventory.csv")).line_count new_inserted_count = self._col.count_documents({}) - start_count self.assertEqual(new_inserted_count, write_count) # header must be subtracted self.assertEqual(new_inserted_count, line_count - 1) # header must be subtracted os.unlink(f("data/inventory.testff"))
def test_http_import(self): if check_internet(): csv_parser = LineToDictParser(self._ff) reader = FileReader( "https://data.cityofnewyork.us/api/views/biws-g3hs/rows.csv?accessType=DOWNLOAD&bom=true&format=true&delimiter=%3B", has_header=True, delimiter=';') writer = FileWriter(self._collection, reader, csv_parser) before_doc_count = self._collection.count_documents({}) after_doc_count = writer.write(1000) self.assertEqual(after_doc_count - before_doc_count, 1000) else: print("Warning:No internet: test_http_import() skipped")
def pre_execute(self, arg): # print(f"'{arg}'") super().pre_execute(arg) self._log.info("Using collection:'{}'".format(self._collection.full_name)) if self._field_filename is None: self._field_filename = FieldFile.make_default_tff_name(arg) self._log.info(f"Using field file:'{self._field_filename}'") if not os.path.isfile(self._field_filename): raise OSError(f"No such field file:'{self._field_filename}'") self._fieldinfo = FieldFile(self._field_filename) self._reader = FileReader(arg, limit=self._limit, has_header=self._has_header, delimiter=self._delimiter) self._parser = LineToDictParser(self._fieldinfo, locator=self._locator, timestamp=self._timestamp, onerror=self._onerror) self._writer = FileWriter(self._collection,self._reader,self._parser)
def test_reader(self): fc = FieldFile.generate_field_file(f("data/inventory.csv"), f("data/inventory_test.tff")) ff = FieldFile(fc.field_filename) reader = FileReader(f("data/inventory.csv"), has_header=True) parser = LineToDictParser(ff) for i, row in enumerate(reader.readline(), 1): doc = parser.parse_list(row, i) for field in ff.fields(): self.assertTrue(field in doc, f"'{field}'") os.unlink(fc.field_filename) ff = FieldFile(f("data/uk_property_prices.tff")) reader = FileReader(f("data/uk_property_prices.csv"), has_header=True) parser = LineToDictParser(ff) for i, row in enumerate(reader.readline(), i): doc = parser.parse_list(row, i) for field in ff.fields(): if field == "txn": # converted to _id field continue self.assertTrue(field in doc, f"{field} not present") self.assertTrue(type(doc["Price"]) == int) self.assertTrue(type(doc["Date of Transfer"]) == datetime)
def setUp(self): self._client = pymongo.MongoClient() self._db = self._client["PYIM_HTTP_TEST"] self._collection = self._db["PYIM_HTTP_TEST"] self._ff = FieldFile(f("data/2018_Yellow_Taxi_Trip_Data_1000.ff")) self._parser = LineToDictParser(self._ff)