コード例 #1
0
 def test_fieldfile_nomatch(self):
     fc = FieldFile(f("data/AandE_Data_2011-04-10.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/inventory.csv'), has_header=True)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     with self.assertRaises(ValueError):
         bw.write()
コード例 #2
0
 def test_delimiter_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/AandE_Data_2011-04-10.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/AandE_Data_2011-04-10.csv'), has_header=True)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     bw.write()
     self.assertEqual(self._col.count_documents({}) - start_count, 300)
コード例 #3
0
 def test_delimiter_no_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/10k.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f("data/10k.txt"), has_header=False, delimiter="|")
     bw = FileWriter(self._col, reader=reader, parser=parser)
     bw.write()
     self.assertEqual(self._col.count_documents({}) - start_count, 10000)
コード例 #4
0
 def test_new_delimiter_and_timeformat_header(self):
     start_count = self._col.count_documents({})
     fc = FieldFile(f("data/mot.tff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f('data/mot_test_set_small.csv'), has_header=False, delimiter="|")
     self.assertTrue(type(reader.name) == str)
     bw = FileWriter(self._col, reader=reader, parser=parser)
     total = bw.write()
     lines = LineCounter(f('data/mot_test_set_small.csv')).line_count
     inserted_count = self._col.count_documents({}) - start_count
     self.assertEqual(inserted_count, total)
     self.assertEqual(inserted_count, lines)
コード例 #5
0
    def test_date(self):
        config = FieldFile(f("data/inventory_dates.tff"))
        parser = LineToDictParser(config, locator=False)  # screws up comparison later if locator is true
        reader = FileReader(f("data/inventory.csv"), has_header=True)
        start_count = self._col.count_documents({})
        writer = FileWriter(self._col, reader=reader, parser=parser)
        docs_written = writer.write()
        line_count = LineCounter(f("data/inventory.csv")).line_count
        self.assertEqual(self._col.count_documents({}) - start_count, line_count - 1)  # header must be subtracted
        self.assertEqual(self._col.count_documents({}), docs_written)

        nuts_doc = self._col.find_one({"Last Order": dateutil.parser.parse("29-Feb-2016")})
        self.assertTrue(nuts_doc)
コード例 #6
0
 def test_generate_fieldfile(self):
     fc = FieldFile.generate_field_file(f("data/inventory.csv"), ext="testff")
     self.assertEqual(fc.field_filename, f("data/inventory.testff"), fc.field_filename)
     self.assertTrue(os.path.isfile(f("data/inventory.testff")), f("data/inventory.testff"))
     parser = LineToDictParser(fc)
     reader = FileReader(f("data/inventory.csv"), has_header=True)
     start_count = self._col.count_documents({})
     writer = FileWriter(self._col, reader=reader, parser=parser)
     write_count = writer.write()
     line_count = LineCounter(f("data/inventory.csv")).line_count
     new_inserted_count = self._col.count_documents({}) - start_count
     self.assertEqual(new_inserted_count, write_count)  # header must be subtracted
     self.assertEqual(new_inserted_count, line_count - 1)  # header must be subtracted
     os.unlink(f("data/inventory.testff"))
コード例 #7
0
    def test_http_import(self):
        if check_internet():
            csv_parser = LineToDictParser(self._ff)
            reader = FileReader(
                "https://data.cityofnewyork.us/api/views/biws-g3hs/rows.csv?accessType=DOWNLOAD&bom=true&format=true&delimiter=%3B",
                has_header=True,
                delimiter=';')

            writer = FileWriter(self._collection, reader, csv_parser)
            before_doc_count = self._collection.count_documents({})
            after_doc_count = writer.write(1000)
            self.assertEqual(after_doc_count - before_doc_count, 1000)
        else:
            print("Warning:No internet: test_http_import() skipped")
コード例 #8
0
    def test_local_import(self):
        reader = FileReader(f("data/2018_Yellow_Taxi_Trip_Data_1000.csv"),
                            has_header=True,
                            delimiter=";")

        before_doc_count = self._collection.count_documents({})

        writer = FileWriter(self._collection,
                            reader=reader,
                            parser=self._parser)
        writer.write(10)

        after_doc_count = self._collection.count_documents({})

        self.assertEqual(after_doc_count - before_doc_count, 10)
コード例 #9
0
ファイル: command.py プロジェクト: judy2k/pymongoimport
    def pre_execute(self, arg):
        # print(f"'{arg}'")
        super().pre_execute(arg)
        self._log.info("Using collection:'{}'".format(self._collection.full_name))

        if self._field_filename is None:
            self._field_filename = FieldFile.make_default_tff_name(arg)

        self._log.info(f"Using field file:'{self._field_filename}'")

        if not os.path.isfile(self._field_filename):
            raise OSError(f"No such field file:'{self._field_filename}'")

        self._fieldinfo = FieldFile(self._field_filename)

        self._reader = FileReader(arg,
                                  limit=self._limit,
                                  has_header=self._has_header,
                                  delimiter=self._delimiter)
        self._parser = LineToDictParser(self._fieldinfo,
                                        locator=self._locator,
                                        timestamp=self._timestamp,
                                        onerror=self._onerror)
        self._writer = FileWriter(self._collection,self._reader,self._parser)
コード例 #10
0
ファイル: command.py プロジェクト: judy2k/pymongoimport
class ImportCommand(Command):

    def __init__(self,
                 collection:pymongo.collection,
                 field_filename: str = None,
                 delimiter:str = ",",
                 has_header:bool = True,
                 onerror: ErrorResponse = ErrorResponse.Warn,
                 limit: int = 0,
                 locator=False,
                 timestamp: DocTimeStamp = DocTimeStamp.NO_TIMESTAMP,
                 audit:bool= None,
                 id:object= None):

        super().__init__(audit, id)

        self._log = logging.getLogger(__name__)
        self._collection = collection
        self._name = "import"
        self._field_filename = field_filename
        self._delimiter = delimiter
        self._has_header = has_header
        self._parser = None
        self._reader = None
        self._writer = None
        self._onerror = onerror
        self._limit = limit
        self._locator = locator
        self._timestamp = timestamp
        self._total_written = 0

    def pre_execute(self, arg):
        # print(f"'{arg}'")
        super().pre_execute(arg)
        self._log.info("Using collection:'{}'".format(self._collection.full_name))

        if self._field_filename is None:
            self._field_filename = FieldFile.make_default_tff_name(arg)

        self._log.info(f"Using field file:'{self._field_filename}'")

        if not os.path.isfile(self._field_filename):
            raise OSError(f"No such field file:'{self._field_filename}'")

        self._fieldinfo = FieldFile(self._field_filename)

        self._reader = FileReader(arg,
                                  limit=self._limit,
                                  has_header=self._has_header,
                                  delimiter=self._delimiter)
        self._parser = LineToDictParser(self._fieldinfo,
                                        locator=self._locator,
                                        timestamp=self._timestamp,
                                        onerror=self._onerror)
        self._writer = FileWriter(self._collection,self._reader,self._parser)

    def execute(self, arg):

        self._total_written = self._writer.write()

        return self._total_written

    def total_written(self):
        return self._total_written

    @property
    def fieldinfo(self):
        return self._fieldinfo

    def post_execute(self, arg):
        super().post_execute(arg)
        if self._audit:
            self._audit.add_command(self._id, self.name(), {"filename": arg})

        if self._log:
            self._log.info("imported file: '%s'", arg)