Esempio n. 1
0
    def test_simple(self):
        csv_filepath = get_sample_filepath('simple.xls')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_table(csv_filepath, resource_id=resource_id,
                          mimetype='xls', logger=PrintLogger())

        assert_in(self._get_records(
            'test1', limit=1, exclude_full_text_column=False)[0][1],
             ["'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8",
              "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2"])
        # these are slightly different between CKAN 2.7 and 2.8, due to changes
        # in the indexing

        assert_equal(
            self._get_records('test1'),
            [(1, datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway'),
             (2, datetime.datetime(2011, 1, 2, 0, 0), Decimal('-1'), u'Galway'),
             (3, datetime.datetime(2011, 1, 3, 0, 0), Decimal('0'), u'Galway'),
             (4, datetime.datetime(2011, 1, 1, 0, 0), Decimal('6'), u'Berkeley'),
             (5, datetime.datetime(2011, 1, 2, 0, 0), Decimal('8'), u'Berkeley'),
             (6, datetime.datetime(2011, 1, 3, 0, 0), Decimal('5'), u'Berkeley')])
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(
            self._get_column_types('test1'),
            [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])
    def test_simple(self):
        csv_filepath = get_sample_filepath('simple.xls')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_table(csv_filepath,
                          resource_id=resource_id,
                          mimetype='xls',
                          logger=PrintLogger())

        assert_equal(
            self._get_records('test1', limit=1,
                              exclude_full_text_column=False),
            [(1, "'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8",
              datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway')])
        assert_equal(self._get_records('test1'), [
            (1, datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway'),
            (2, datetime.datetime(2011, 1, 2, 0, 0), Decimal('-1'), u'Galway'),
            (3, datetime.datetime(2011, 1, 3, 0, 0), Decimal('0'), u'Galway'),
            (4, datetime.datetime(2011, 1, 1, 0,
                                  0), Decimal('6'), u'Berkeley'),
            (5, datetime.datetime(2011, 1, 2, 0,
                                  0), Decimal('8'), u'Berkeley'),
            (6, datetime.datetime(2011, 1, 3, 0, 0), Decimal('5'), u'Berkeley')
        ])
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])
Esempio n. 3
0
 def test_no_entries(self):
     csv_filepath = get_sample_filepath('no_entries.csv')
     # no datastore table is created - we need to except, or else
     # datastore_active will be set on a non-existent datastore table
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     with assert_raises(LoaderError) as exception:
         loader.load_table(csv_filepath, resource_id=resource_id,
                           mimetype='csv', logger=PrintLogger())
Esempio n. 4
0
 def test_boston_311_sample5(self):
     # to create the test file:
     # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv
     csv_filepath = get_sample_filepath('boston_311_sample5.csv')
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     import time
     t0 = time.time()
     print '{} Start load'.format(time.strftime('%H:%M:%S', time.localtime(t0)))
     loader.load_table(csv_filepath, resource_id=resource_id,
                       mimetype='csv', logger=PrintLogger())
     print 'Load: {}s'.format(time.time() - t0)
Esempio n. 5
0
 def test_boston_311_complete(self):
     # to get the test file:
     # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv
     csv_filepath = get_sample_filepath('boston_311.csv')
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     import time
     t0 = time.time()
     print '{} Start load'.format(time.strftime('%H:%M:%S', time.localtime(t0)))
     loader.load_table(csv_filepath, resource_id=resource_id,
                       mimetype='csv', logger=PrintLogger())
     print 'Load: {}s'.format(time.time() - t0)
Esempio n. 6
0
 def test_no_entries(self):
     csv_filepath = get_sample_filepath("no_entries.csv")
     # no datastore table is created - we need to except, or else
     # datastore_active will be set on a non-existent datastore table
     resource_id = "test1"
     factories.Resource(id=resource_id)
     with pytest.raises(LoaderError):
         loader.load_table(
             csv_filepath,
             resource_id=resource_id,
             mimetype="csv",
             logger=PrintLogger(),
         )
Esempio n. 7
0
    def test_boston_311_sample5(self):
        # to create the test file:
        # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv
        csv_filepath = get_sample_filepath("boston_311_sample5.csv")
        resource_id = "test1"
        factories.Resource(id=resource_id)
        import time

        t0 = time.time()
        print("{} Start load".format(
            time.strftime("%H:%M:%S", time.localtime(t0))))
        loader.load_table(
            csv_filepath,
            resource_id=resource_id,
            mimetype="csv",
            logger=PrintLogger(),
        )
        print("Load: {}s".format(time.time() - t0))
Esempio n. 8
0
    def test_boston_311(self):
        csv_filepath = get_sample_filepath('boston_311_sample.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_table(csv_filepath, resource_id=resource_id,
                          mimetype='csv', logger=PrintLogger())

        records = self._get_records('test1')
        print records
        assert_equal(
            records,
            [(1, Decimal('101002153891'), datetime.datetime(2017, 7, 6, 23, 38, 43), datetime.datetime(2017, 7, 21, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Street Light Outages', u'Public Works Department', u'Street Lights', u'Street Light Outages', u'PWDx_Street Light Outages', u'PWDx', u'', u'', u'480 Harvard St  Dorchester  MA  02124', Decimal('8'), Decimal('7'), Decimal('4'), u'B3', u'Greater Mattapan', Decimal('9'), u'Ward 14', Decimal('1411'), u'480 Harvard St', Decimal('2124'), Decimal('42.288'), Decimal('-71.0927'), u'Citizens Connect App'),
            (2, Decimal('101002153890'), datetime.datetime(2017, 7, 6, 23, 29, 13), datetime.datetime(2017, 9, 11, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Graffiti Removal', u'Property Management', u'Graffiti', u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP', u' https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg', u'', u'522 Saratoga St  East Boston  MA  02128', Decimal('1'), Decimal('9'), Decimal('1'), u'A7', u'East Boston', Decimal('1'), u'Ward 1', Decimal('110'), u'522 Saratoga St', Decimal('2128'), Decimal('42.3807'), Decimal('-71.0259'), u'Citizens Connect App'),
            (3, Decimal('101002153889'), datetime.datetime(2017, 7, 6, 23, 24, 20), datetime.datetime(2017, 9, 11, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Graffiti Removal', u'Property Management', u'Graffiti', u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP', u' https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg', u'', u'965 Bennington St  East Boston  MA  02128', Decimal('1'), Decimal('9'), Decimal('1'), u'A7', u'East Boston', Decimal('1'), u'Ward 1', Decimal('112'), u'965 Bennington St', Decimal('2128'), Decimal('42.386'), Decimal('-71.008'), u'Citizens Connect App')]
            )
        print self._get_column_names('test1')
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'CASE_ENQUIRY_ID', u'open_dt', u'target_dt', u'closed_dt', u'OnTime_Status', u'CASE_STATUS', u'CLOSURE_REASON', u'CASE_TITLE', u'SUBJECT', u'REASON', u'TYPE', u'QUEUE', u'Department', u'SubmittedPhoto', u'ClosedPhoto', u'Location', u'Fire_district', u'pwd_district', u'city_council_district', u'police_district', u'neighborhood', u'neighborhood_services_district', u'ward', u'precinct', u'LOCATION_STREET_NAME', u'LOCATION_ZIPCODE', u'Latitude', u'Longitude', u'Source'])
        print self._get_column_types('test1')
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector',
                      u'numeric', u'timestamp', u'timestamp', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'numeric', u'numeric', u'numeric', u'text', u'text', u'numeric', u'text', u'numeric', u'text', u'numeric', u'numeric', u'numeric', u'text'])
Esempio n. 9
0
    def test_boston_311(self, Session):
        csv_filepath = get_sample_filepath("boston_311_sample.csv")
        resource_id = "test1"
        factories.Resource(id=resource_id)
        loader.load_table(
            csv_filepath,
            resource_id=resource_id,
            mimetype="csv",
            logger=PrintLogger(),
        )

        records = self._get_records(Session, "test1")
        print(records)
        assert records == [
            (
                1,
                Decimal("101002153891"),
                datetime.datetime(2017, 7, 6, 23, 38, 43),
                datetime.datetime(2017, 7, 21, 8, 30),
                u"",
                u"ONTIME",
                u"Open",
                u" ",
                u"Street Light Outages",
                u"Public Works Department",
                u"Street Lights",
                u"Street Light Outages",
                u"PWDx_Street Light Outages",
                u"PWDx",
                u"",
                u"",
                u"480 Harvard St  Dorchester  MA  02124",
                Decimal("8"),
                Decimal("7"),
                Decimal("4"),
                u"B3",
                u"Greater Mattapan",
                Decimal("9"),
                u"Ward 14",
                Decimal("1411"),
                u"480 Harvard St",
                Decimal("2124"),
                Decimal("42.288"),
                Decimal("-71.0927"),
                u"Citizens Connect App",
            ),  # noqa
            (
                2,
                Decimal("101002153890"),
                datetime.datetime(2017, 7, 6, 23, 29, 13),
                datetime.datetime(2017, 9, 11, 8, 30),
                u"",
                u"ONTIME",
                u"Open",
                u" ",
                u"Graffiti Removal",
                u"Property Management",
                u"Graffiti",
                u"Graffiti Removal",
                u"PROP_GRAF_GraffitiRemoval",
                u"PROP",
                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",
                u"",
                u"522 Saratoga St  East Boston  MA  02128",
                Decimal("1"),
                Decimal("9"),
                Decimal("1"),
                u"A7",
                u"East Boston",
                Decimal("1"),
                u"Ward 1",
                Decimal("110"),
                u"522 Saratoga St",
                Decimal("2128"),
                Decimal("42.3807"),
                Decimal("-71.0259"),
                u"Citizens Connect App",
            ),  # noqa
            (
                3,
                Decimal("101002153889"),
                datetime.datetime(2017, 7, 6, 23, 24, 20),
                datetime.datetime(2017, 9, 11, 8, 30),
                u"",
                u"ONTIME",
                u"Open",
                u" ",
                u"Graffiti Removal",
                u"Property Management",
                u"Graffiti",
                u"Graffiti Removal",
                u"PROP_GRAF_GraffitiRemoval",
                u"PROP",
                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",
                u"",
                u"965 Bennington St  East Boston  MA  02128",
                Decimal("1"),
                Decimal("9"),
                Decimal("1"),
                u"A7",
                u"East Boston",
                Decimal("1"),
                u"Ward 1",
                Decimal("112"),
                u"965 Bennington St",
                Decimal("2128"),
                Decimal("42.386"),
                Decimal("-71.008"),
                u"Citizens Connect App",
            ),
        ]  # noqa
        print(self._get_column_names(Session, "test1"))
        assert self._get_column_names(Session, "test1") == [
            u"_id",
            u"_full_text",
            u"CASE_ENQUIRY_ID",
            u"open_dt",
            u"target_dt",
            u"closed_dt",
            u"OnTime_Status",
            u"CASE_STATUS",
            u"CLOSURE_REASON",
            u"CASE_TITLE",
            u"SUBJECT",
            u"REASON",
            u"TYPE",
            u"QUEUE",
            u"Department",
            u"SubmittedPhoto",
            u"ClosedPhoto",
            u"Location",
            u"Fire_district",
            u"pwd_district",
            u"city_council_district",
            u"police_district",
            u"neighborhood",
            u"neighborhood_services_district",
            u"ward",
            u"precinct",
            u"LOCATION_STREET_NAME",
            u"LOCATION_ZIPCODE",
            u"Latitude",
            u"Longitude",
            u"Source",
        ]  # noqa
        print(self._get_column_types(Session, "test1"))
        assert self._get_column_types(Session, "test1") == [
            u"int4",
            u"tsvector",
            u"numeric",
            u"timestamp",
            u"timestamp",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"text",
            u"numeric",
            u"numeric",
            u"numeric",
            u"text",
            u"text",
            u"numeric",
            u"text",
            u"numeric",
            u"text",
            u"numeric",
            u"numeric",
            u"numeric",
            u"text",
        ]  # noqa
Esempio n. 10
0
    def test_simple(self, Session):
        csv_filepath = get_sample_filepath("simple.xls")
        resource_id = "test1"
        factories.Resource(id=resource_id)
        loader.load_table(
            csv_filepath,
            resource_id=resource_id,
            mimetype="xls",
            logger=PrintLogger(),
        )

        assert ("'galway':"
                in self._get_records(Session,
                                     "test1",
                                     limit=1,
                                     exclude_full_text_column=False)[0][1])
        # Indexed record looks like this (depending on CKAN version?):
        #   "'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8"
        #   "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2"
        #   "'-01':2,3 '00':5,6 '1':7 '2011':1 'galway':8 't00':4"

        assert self._get_records(Session, "test1") == [
            (
                1,
                datetime.datetime(2011, 1, 1, 0, 0),
                Decimal("1"),
                u"Galway",
            ),
            (
                2,
                datetime.datetime(2011, 1, 2, 0, 0),
                Decimal("-1"),
                u"Galway",
            ),
            (
                3,
                datetime.datetime(2011, 1, 3, 0, 0),
                Decimal("0"),
                u"Galway",
            ),
            (
                4,
                datetime.datetime(2011, 1, 1, 0, 0),
                Decimal("6"),
                u"Berkeley",
            ),
            (
                5,
                datetime.datetime(2011, 1, 2, 0, 0),
                Decimal("8"),
                u"Berkeley",
            ),
            (
                6,
                datetime.datetime(2011, 1, 3, 0, 0),
                Decimal("5"),
                u"Berkeley",
            ),
        ]
        assert self._get_column_names(Session, "test1") == [
            u"_id",
            u"_full_text",
            u"date",
            u"temperature",
            u"place",
        ]
        assert self._get_column_types(Session, "test1") == [
            u"int4",
            u"tsvector",
            u"timestamp",
            u"numeric",
            u"text",
        ]