Esempio n. 1
0
    def test_from_ISO_8859_1_csv(self):
        file = os.path.realpath(this_dir + "/data/area2_yield_ISO-8859-1.csv")
        descCSV = CsvDescribe(file)
        self.assertEqual(predictCoordinateColumnNames(descCSV.get_column_names()), ['Longitude', 'Latitude'])

        df = descCSV.open_pandas_dataframe()
        self.assertListEqual(df.columns.to_list(), descCSV.get_column_names())
Esempio n. 2
0
    def test01_csvDescribe_ASCII(self):
        csv_desc = CsvDescribe(file_csv)

        self.assertEqual(csv_desc.file_encoding, 'ascii')
        self.assertEqual(csv_desc.row_count, 13756)
        self.assertEqual(csv_desc.column_count, 24)
        self.assertEqual(predictCoordinateColumnNames(csv_desc.get_column_names()), ['Lon', 'Lat'])
        self.assertTrue(csv_desc.has_column_header)
Esempio n. 3
0
    def test_csvfile_UTF8(self):
        csvDesc = CsvDescribe(
            os.path.realpath(this_dir + "/data/area2_yield_ISO-8859-1.csv"))

        self.assertEqual(csvDesc.file_encoding, 'ISO-8859-1')
        self.assertEqual(csvDesc.row_count, 1543)
        self.assertEqual(csvDesc.column_count, 18)
        self.assertEqual(
            predictCoordinateColumnNames(csvDesc.get_column_names()),
            ['Longitude', 'Latitude'])
        self.assertTrue(csvDesc.has_column_header)
Esempio n. 4
0
    def test_csvfile_UTF8(self):
        csvDesc = CsvDescribe(os.path.realpath(this_dir + "/data/area2_yield_ISO-8859-1.csv"))


        # chardet seems to detect this file as ISO-8869-9, which is described
        # as "Largely the same as ISO/IEC 8859-1, replacing the rarely used
        # Icelandic letters with Turkish ones."

        #self.assertEqual(csvDesc.file_encoding, 'ISO-8859-1')
        self.assertEqual(csvDesc.file_encoding, 'ISO-8859-9')
        self.assertEqual(csvDesc.row_count, 1543)
        self.assertEqual(csvDesc.column_count, 18)
        self.assertEqual(predictCoordinateColumnNames(csvDesc.get_column_names()), ['Longitude', 'Latitude'])
        self.assertTrue(csvDesc.has_column_header)

        self.assertEqual(csvDesc.get_column_names()[-1], csvDesc.get_alias_column_names()[-1])
        self.assertNotEqual(csvDesc.get_column_names()[-2], csvDesc.get_alias_column_names()[-2])

        self.assertEqual(u'Crop Flw(V)(m\xb3/s)', csvDesc.get_column_names()[-2])
        self.assertEqual('CropFlw(V)(m3/s)', csvDesc.get_alias_column_names()[-2])

        #check to see if unicode characters exist True if all ascii, false if not
        self.assertTrue(all(ord(char) < 128 for char in csvDesc.get_alias_column_names()[-2]))
        self.assertFalse(all(ord(char) < 128 for char in csvDesc.get_column_names()[-2]))