コード例 #1
0
 def test_fieldfile_nomatch(self):
     fc = FieldFile(f("data/AandE_Data_2011-04-10.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/inventory.csv'), has_header=True)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     with self.assertRaises(ValueError):
         bw.write()
コード例 #2
0
 def test_delimiter_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/AandE_Data_2011-04-10.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/AandE_Data_2011-04-10.csv'), has_header=True)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     bw.write()
     self.assertEqual(self._col.count_documents({}) - start_count, 300)
コード例 #3
0
 def test_delimiter_no_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/10k.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f("data/10k.txt"), has_header=False, delimiter="|")
     bw = FileWriter(self._col, reader=reader, parser=parser)
     bw.write()
     self.assertEqual(self._col.count_documents({}) - start_count, 10000)
コード例 #4
0
 def test_new_delimiter_and_timeformat_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/mot.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/mot_test_set_small.csv'), has_header=False, delimiter="|")
     self.assertTrue(type(reader.name) == str)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     total = bw.write()
     lines = LineCounter(f('data/mot_test_set_small.csv')).line_count
     inserted_count = self._col.count_documents({}) - start_count
     self.assertEqual(inserted_count, total)
     self.assertEqual(inserted_count, lines)
コード例 #5
0
    def test_date(self):
        config = FieldFile(f("data/inventory_dates.tff"))
        parser = LineToDictParser(config, locator=False)  # screws up comparison later if locator is true
        reader = FileReader(f("data/inventory.csv"), has_header=True)
        start_count = self._col.count_documents({})
        writer = FileWriter(self._col, reader=reader, parser=parser)
        docs_written = writer.write()
        line_count = LineCounter(f("data/inventory.csv")).line_count
        self.assertEqual(self._col.count_documents({}) - start_count, line_count - 1)  # header must be subtracted
        self.assertEqual(self._col.count_documents({}), docs_written)

        nuts_doc = self._col.find_one({"Last Order": dateutil.parser.parse("29-Feb-2016")})
        self.assertTrue(nuts_doc)
コード例 #6
0
 def test_generate_fieldfile(self):
     fc = FieldFile.generate_field_file(f("data/inventory.csv"), ext="testff")
     self.assertEqual(fc.field_filename, f("data/inventory.testff"), fc.field_filename)
     self.assertTrue(os.path.isfile(f("data/inventory.testff")), f("data/inventory.testff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f("data/inventory.csv"), has_header=True)
     start_count = self._col.count_documents({})
     writer = FileWriter(self._col, reader=reader, parser=parser)
     write_count = writer.write()
     line_count = LineCounter(f("data/inventory.csv")).line_count
     new_inserted_count = self._col.count_documents({}) - start_count
     self.assertEqual(new_inserted_count, write_count)  # header must be subtracted
     self.assertEqual(new_inserted_count, line_count - 1)  # header must be subtracted
     os.unlink(f("data/inventory.testff"))
コード例 #7
0
    def test_http_import(self):
        if check_internet():
            csv_parser = LineToDictParser(self._ff)
            reader = FileReader(
                "https://data.cityofnewyork.us/api/views/biws-g3hs/rows.csv?accessType=DOWNLOAD&bom=true&format=true&delimiter=%3B",
                has_header=True,
                delimiter=';')

            writer = FileWriter(self._collection, reader, csv_parser)
            before_doc_count = self._collection.count_documents({})
            after_doc_count = writer.write(1000)
            self.assertEqual(after_doc_count - before_doc_count, 1000)
        else:
            print("Warning:No internet: test_http_import() skipped")
コード例 #8
0
ファイル: command.py プロジェクト: judy2k/pymongoimport
    def pre_execute(self, arg):
        # print(f"'{arg}'")
        super().pre_execute(arg)
        self._log.info("Using collection:'{}'".format(self._collection.full_name))

        if self._field_filename is None:
            self._field_filename = FieldFile.make_default_tff_name(arg)

        self._log.info(f"Using field file:'{self._field_filename}'")

        if not os.path.isfile(self._field_filename):
            raise OSError(f"No such field file:'{self._field_filename}'")

        self._fieldinfo = FieldFile(self._field_filename)

        self._reader = FileReader(arg,
                                  limit=self._limit,
                                  has_header=self._has_header,
                                  delimiter=self._delimiter)
        self._parser = LineToDictParser(self._fieldinfo,
                                        locator=self._locator,
                                        timestamp=self._timestamp,
                                        onerror=self._onerror)
        self._writer = FileWriter(self._collection,self._reader,self._parser)
コード例 #9
0
    def test_reader(self):
        fc = FieldFile.generate_field_file(f("data/inventory.csv"), f("data/inventory_test.tff"))
        ff = FieldFile(fc.field_filename)
        reader = FileReader(f("data/inventory.csv"), has_header=True)
        parser = LineToDictParser(ff)
        for i, row in enumerate(reader.readline(), 1):
            doc = parser.parse_list(row, i)
            for field in ff.fields():
                self.assertTrue(field in doc, f"'{field}'")

        os.unlink(fc.field_filename)

        ff = FieldFile(f("data/uk_property_prices.tff"))
        reader = FileReader(f("data/uk_property_prices.csv"), has_header=True)

        parser = LineToDictParser(ff)
        for i, row in enumerate(reader.readline(), i):
            doc = parser.parse_list(row, i)
            for field in ff.fields():
                if field == "txn":  # converted to _id field
                    continue
                self.assertTrue(field in doc, f"{field} not present")
                self.assertTrue(type(doc["Price"]) == int)
                self.assertTrue(type(doc["Date of Transfer"]) == datetime)
コード例 #10
0
 def setUp(self):
     self._client = pymongo.MongoClient()
     self._db = self._client["PYIM_HTTP_TEST"]
     self._collection = self._db["PYIM_HTTP_TEST"]
     self._ff = FieldFile(f("data/2018_Yellow_Taxi_Trip_Data_1000.ff"))
     self._parser = LineToDictParser(self._ff)