def test_simple(self): csv_filepath = get_sample_filepath('simple.xls') resource_id = 'test1' factories.Resource(id=resource_id) loader.load_table(csv_filepath, resource_id=resource_id, mimetype='xls', logger=PrintLogger()) assert_in(self._get_records( 'test1', limit=1, exclude_full_text_column=False)[0][1], ["'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8", "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2"]) # these are slightly different between CKAN 2.7 and 2.8, due to changes # in the indexing assert_equal( self._get_records('test1'), [(1, datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway'), (2, datetime.datetime(2011, 1, 2, 0, 0), Decimal('-1'), u'Galway'), (3, datetime.datetime(2011, 1, 3, 0, 0), Decimal('0'), u'Galway'), (4, datetime.datetime(2011, 1, 1, 0, 0), Decimal('6'), u'Berkeley'), (5, datetime.datetime(2011, 1, 2, 0, 0), Decimal('8'), u'Berkeley'), (6, datetime.datetime(2011, 1, 3, 0, 0), Decimal('5'), u'Berkeley')]) assert_equal( self._get_column_names('test1'), [u'_id', u'_full_text', u'date', u'temperature', u'place']) assert_equal( self._get_column_types('test1'), [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])
def test_simple(self): csv_filepath = get_sample_filepath('simple.xls') resource_id = 'test1' factories.Resource(id=resource_id) loader.load_table(csv_filepath, resource_id=resource_id, mimetype='xls', logger=PrintLogger()) assert_equal( self._get_records('test1', limit=1, exclude_full_text_column=False), [(1, "'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8", datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway')]) assert_equal(self._get_records('test1'), [ (1, datetime.datetime(2011, 1, 1, 0, 0), Decimal('1'), u'Galway'), (2, datetime.datetime(2011, 1, 2, 0, 0), Decimal('-1'), u'Galway'), (3, datetime.datetime(2011, 1, 3, 0, 0), Decimal('0'), u'Galway'), (4, datetime.datetime(2011, 1, 1, 0, 0), Decimal('6'), u'Berkeley'), (5, datetime.datetime(2011, 1, 2, 0, 0), Decimal('8'), u'Berkeley'), (6, datetime.datetime(2011, 1, 3, 0, 0), Decimal('5'), u'Berkeley') ]) assert_equal( self._get_column_names('test1'), [u'_id', u'_full_text', u'date', u'temperature', u'place']) assert_equal(self._get_column_types('test1'), [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])
def test_no_entries(self): csv_filepath = get_sample_filepath('no_entries.csv') # no datastore table is created - we need to except, or else # datastore_active will be set on a non-existent datastore table resource_id = 'test1' factories.Resource(id=resource_id) with assert_raises(LoaderError) as exception: loader.load_table(csv_filepath, resource_id=resource_id, mimetype='csv', logger=PrintLogger())
def test_boston_311_sample5(self): # to create the test file: # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv csv_filepath = get_sample_filepath('boston_311_sample5.csv') resource_id = 'test1' factories.Resource(id=resource_id) import time t0 = time.time() print '{} Start load'.format(time.strftime('%H:%M:%S', time.localtime(t0))) loader.load_table(csv_filepath, resource_id=resource_id, mimetype='csv', logger=PrintLogger()) print 'Load: {}s'.format(time.time() - t0)
def test_boston_311_complete(self): # to get the test file: # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv csv_filepath = get_sample_filepath('boston_311.csv') resource_id = 'test1' factories.Resource(id=resource_id) import time t0 = time.time() print '{} Start load'.format(time.strftime('%H:%M:%S', time.localtime(t0))) loader.load_table(csv_filepath, resource_id=resource_id, mimetype='csv', logger=PrintLogger()) print 'Load: {}s'.format(time.time() - t0)
def test_no_entries(self): csv_filepath = get_sample_filepath("no_entries.csv") # no datastore table is created - we need to except, or else # datastore_active will be set on a non-existent datastore table resource_id = "test1" factories.Resource(id=resource_id) with pytest.raises(LoaderError): loader.load_table( csv_filepath, resource_id=resource_id, mimetype="csv", logger=PrintLogger(), )
def test_boston_311_sample5(self): # to create the test file: # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv csv_filepath = get_sample_filepath("boston_311_sample5.csv") resource_id = "test1" factories.Resource(id=resource_id) import time t0 = time.time() print("{} Start load".format( time.strftime("%H:%M:%S", time.localtime(t0)))) loader.load_table( csv_filepath, resource_id=resource_id, mimetype="csv", logger=PrintLogger(), ) print("Load: {}s".format(time.time() - t0))
def test_boston_311(self): csv_filepath = get_sample_filepath('boston_311_sample.csv') resource_id = 'test1' factories.Resource(id=resource_id) loader.load_table(csv_filepath, resource_id=resource_id, mimetype='csv', logger=PrintLogger()) records = self._get_records('test1') print records assert_equal( records, [(1, Decimal('101002153891'), datetime.datetime(2017, 7, 6, 23, 38, 43), datetime.datetime(2017, 7, 21, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Street Light Outages', u'Public Works Department', u'Street Lights', u'Street Light Outages', u'PWDx_Street Light Outages', u'PWDx', u'', u'', u'480 Harvard St Dorchester MA 02124', Decimal('8'), Decimal('7'), Decimal('4'), u'B3', u'Greater Mattapan', Decimal('9'), u'Ward 14', Decimal('1411'), u'480 Harvard St', Decimal('2124'), Decimal('42.288'), Decimal('-71.0927'), u'Citizens Connect App'), (2, Decimal('101002153890'), datetime.datetime(2017, 7, 6, 23, 29, 13), datetime.datetime(2017, 9, 11, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Graffiti Removal', u'Property Management', u'Graffiti', u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP', u' https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg', u'', u'522 Saratoga St East Boston MA 02128', Decimal('1'), Decimal('9'), Decimal('1'), u'A7', u'East Boston', Decimal('1'), u'Ward 1', Decimal('110'), u'522 Saratoga St', Decimal('2128'), Decimal('42.3807'), Decimal('-71.0259'), u'Citizens Connect App'), (3, Decimal('101002153889'), datetime.datetime(2017, 7, 6, 23, 24, 20), datetime.datetime(2017, 9, 11, 8, 30), u'', u'ONTIME', u'Open', u' ', u'Graffiti Removal', u'Property Management', u'Graffiti', u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP', u' https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg', u'', u'965 Bennington St East Boston MA 02128', Decimal('1'), Decimal('9'), Decimal('1'), u'A7', u'East Boston', Decimal('1'), u'Ward 1', Decimal('112'), u'965 Bennington St', Decimal('2128'), Decimal('42.386'), Decimal('-71.008'), u'Citizens Connect App')] ) print self._get_column_names('test1') assert_equal( self._get_column_names('test1'), [u'_id', u'_full_text', u'CASE_ENQUIRY_ID', u'open_dt', u'target_dt', u'closed_dt', u'OnTime_Status', u'CASE_STATUS', u'CLOSURE_REASON', u'CASE_TITLE', u'SUBJECT', u'REASON', u'TYPE', u'QUEUE', u'Department', u'SubmittedPhoto', u'ClosedPhoto', u'Location', u'Fire_district', u'pwd_district', u'city_council_district', u'police_district', u'neighborhood', u'neighborhood_services_district', u'ward', u'precinct', u'LOCATION_STREET_NAME', u'LOCATION_ZIPCODE', u'Latitude', u'Longitude', u'Source']) print self._get_column_types('test1') assert_equal(self._get_column_types('test1'), [u'int4', u'tsvector', u'numeric', u'timestamp', u'timestamp', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'text', u'numeric', u'numeric', u'numeric', u'text', u'text', u'numeric', u'text', u'numeric', u'text', u'numeric', u'numeric', u'numeric', u'text'])
def test_boston_311(self, Session): csv_filepath = get_sample_filepath("boston_311_sample.csv") resource_id = "test1" factories.Resource(id=resource_id) loader.load_table( csv_filepath, resource_id=resource_id, mimetype="csv", logger=PrintLogger(), ) records = self._get_records(Session, "test1") print(records) assert records == [ ( 1, Decimal("101002153891"), datetime.datetime(2017, 7, 6, 23, 38, 43), datetime.datetime(2017, 7, 21, 8, 30), u"", u"ONTIME", u"Open", u" ", u"Street Light Outages", u"Public Works Department", u"Street Lights", u"Street Light Outages", u"PWDx_Street Light Outages", u"PWDx", u"", u"", u"480 Harvard St Dorchester MA 02124", Decimal("8"), Decimal("7"), Decimal("4"), u"B3", u"Greater Mattapan", Decimal("9"), u"Ward 14", Decimal("1411"), u"480 Harvard St", Decimal("2124"), Decimal("42.288"), Decimal("-71.0927"), u"Citizens Connect App", ), # noqa ( 2, Decimal("101002153890"), datetime.datetime(2017, 7, 6, 23, 29, 13), datetime.datetime(2017, 9, 11, 8, 30), u"", u"ONTIME", u"Open", u" ", u"Graffiti Removal", u"Property Management", u"Graffiti", u"Graffiti Removal", u"PROP_GRAF_GraffitiRemoval", u"PROP", u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg", u"", u"522 Saratoga St East Boston MA 02128", Decimal("1"), Decimal("9"), Decimal("1"), u"A7", u"East Boston", Decimal("1"), u"Ward 1", Decimal("110"), u"522 Saratoga St", Decimal("2128"), Decimal("42.3807"), Decimal("-71.0259"), u"Citizens Connect App", ), # noqa ( 3, Decimal("101002153889"), datetime.datetime(2017, 7, 6, 23, 24, 20), datetime.datetime(2017, 9, 11, 8, 30), u"", u"ONTIME", u"Open", u" ", u"Graffiti Removal", u"Property Management", u"Graffiti", u"Graffiti Removal", u"PROP_GRAF_GraffitiRemoval", u"PROP", u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg", u"", u"965 Bennington St East Boston MA 02128", Decimal("1"), Decimal("9"), Decimal("1"), u"A7", u"East Boston", Decimal("1"), u"Ward 1", Decimal("112"), u"965 Bennington St", Decimal("2128"), Decimal("42.386"), Decimal("-71.008"), u"Citizens Connect App", ), ] # noqa print(self._get_column_names(Session, "test1")) assert self._get_column_names(Session, "test1") == [ u"_id", u"_full_text", u"CASE_ENQUIRY_ID", u"open_dt", u"target_dt", u"closed_dt", u"OnTime_Status", u"CASE_STATUS", u"CLOSURE_REASON", u"CASE_TITLE", u"SUBJECT", u"REASON", u"TYPE", u"QUEUE", u"Department", u"SubmittedPhoto", u"ClosedPhoto", u"Location", u"Fire_district", u"pwd_district", u"city_council_district", u"police_district", u"neighborhood", u"neighborhood_services_district", u"ward", u"precinct", u"LOCATION_STREET_NAME", u"LOCATION_ZIPCODE", u"Latitude", u"Longitude", u"Source", ] # noqa print(self._get_column_types(Session, "test1")) assert self._get_column_types(Session, "test1") == [ u"int4", u"tsvector", u"numeric", u"timestamp", u"timestamp", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"text", u"numeric", u"numeric", u"numeric", u"text", u"text", u"numeric", u"text", u"numeric", u"text", u"numeric", u"numeric", u"numeric", u"text", ] # noqa
def test_simple(self, Session): csv_filepath = get_sample_filepath("simple.xls") resource_id = "test1" factories.Resource(id=resource_id) loader.load_table( csv_filepath, resource_id=resource_id, mimetype="xls", logger=PrintLogger(), ) assert ("'galway':" in self._get_records(Session, "test1", limit=1, exclude_full_text_column=False)[0][1]) # Indexed record looks like this (depending on CKAN version?): # "'-01':2,3 '00':4,5,6 '1':7 '2011':1 'galway':8" # "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2" # "'-01':2,3 '00':5,6 '1':7 '2011':1 'galway':8 't00':4" assert self._get_records(Session, "test1") == [ ( 1, datetime.datetime(2011, 1, 1, 0, 0), Decimal("1"), u"Galway", ), ( 2, datetime.datetime(2011, 1, 2, 0, 0), Decimal("-1"), u"Galway", ), ( 3, datetime.datetime(2011, 1, 3, 0, 0), Decimal("0"), u"Galway", ), ( 4, datetime.datetime(2011, 1, 1, 0, 0), Decimal("6"), u"Berkeley", ), ( 5, datetime.datetime(2011, 1, 2, 0, 0), Decimal("8"), u"Berkeley", ), ( 6, datetime.datetime(2011, 1, 3, 0, 0), Decimal("5"), u"Berkeley", ), ] assert self._get_column_names(Session, "test1") == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] assert self._get_column_types(Session, "test1") == [ u"int4", u"tsvector", u"timestamp", u"numeric", u"text", ]