Example #1
0
    def test_simple(self):
        csv_filepath = get_sample_filepath('simple.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())

        assert_equal(
            self._get_records('test1', limit=1,
                              exclude_full_text_column=False),
            [(1, "'-01':2,3 '1':4 '2011':1 'galway':5", u'2011-01-01', u'1',
              u'Galway')])
        assert_equal(self._get_records('test1'),
                     [(1, u'2011-01-01', u'1', u'Galway'),
                      (2, u'2011-01-02', u'-1', u'Galway'),
                      (3, u'2011-01-03', u'0', u'Galway'),
                      (4, u'2011-01-01', u'6', u'Berkeley'),
                      (5, u'2011-01-02', u'8', u'Berkeley'),
                      (6, u'2011-01-03', u'5', u'Berkeley')])
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector', u'text', u'text', u'text'])
Example #2
0
    def test_boston_311(self):
        csv_filepath = get_sample_filepath('boston_311_sample.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())

        records = self._get_records('test1')
        print records
        assert_equal(records, [
            (1, u'101002153891', u'2017-07-06 23:38:43',
             u'2017-07-21 08:30:00', None, u'ONTIME', u'Open', u' ',
             u'Street Light Outages', u'Public Works Department',
             u'Street Lights', u'Street Light Outages',
             u'PWDx_Street Light Outages', u'PWDx', None, None,
             u'480 Harvard St  Dorchester  MA  02124', u'8', u'07', u'4',
             u'B3', u'Greater Mattapan', u'9', u'Ward 14', u'1411',
             u'480 Harvard St', u'02124', u'42.288', u'-71.0927',
             u'Citizens Connect App'),
            (2, u'101002153890', u'2017-07-06 23:29:13',
             u'2017-09-11 08:30:00', None, u'ONTIME', u'Open', u' ',
             u'Graffiti Removal', u'Property Management', u'Graffiti',
             u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP',
             u' https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg',
             None, u'522 Saratoga St  East Boston  MA  02128', u'1', u'09',
             u'1', u'A7', u'East Boston', u'1', u'Ward 1', u'0110',
             u'522 Saratoga St', u'02128', u'42.3807', u'-71.0259',
             u'Citizens Connect App'),
            (3, u'101002153889', u'2017-07-06 23:24:20',
             u'2017-09-11 08:30:00', None, u'ONTIME', u'Open', u' ',
             u'Graffiti Removal', u'Property Management', u'Graffiti',
             u'Graffiti Removal', u'PROP_GRAF_GraffitiRemoval', u'PROP',
             u' https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg',
             None, u'965 Bennington St  East Boston  MA  02128', u'1', u'09',
             u'1', u'A7', u'East Boston', u'1', u'Ward 1', u'0112',
             u'965 Bennington St', u'02128', u'42.386', u'-71.008',
             u'Citizens Connect App')
        ])
        print self._get_column_names('test1')
        assert_equal(self._get_column_names('test1'), [
            u'_id', u'_full_text', u'CASE_ENQUIRY_ID', u'open_dt',
            u'target_dt', u'closed_dt', u'OnTime_Status', u'CASE_STATUS',
            u'CLOSURE_REASON', u'CASE_TITLE', u'SUBJECT', u'REASON', u'TYPE',
            u'QUEUE', u'Department', u'SubmittedPhoto', u'ClosedPhoto',
            u'Location', u'Fire_district', u'pwd_district',
            u'city_council_district', u'police_district', u'neighborhood',
            u'neighborhood_services_district', u'ward', u'precinct',
            u'LOCATION_STREET_NAME', u'LOCATION_ZIPCODE', u'Latitude',
            u'Longitude', u'Source'
        ])
        print self._get_column_types('test1')
        assert_equal(self._get_column_types('test1'), [u'int4', u'tsvector'] +
                     [u'text'] * (len(records[0]) - 1))
Example #3
0
 def test_shapefile_zip(self):
     filepath = get_sample_filepath('polling_locations.shapefile.zip')
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     with assert_raises(LoaderError) as exception:
         loader.load_csv(filepath,
                         resource_id=resource_id,
                         mimetype='text/csv',
                         logger=loader.PrintLogger())
     assert_in(
         'Error during the load into PostgreSQL: '
         'unquoted carriage return found in data', str(exception.exception))
Example #4
0
 def test_geojson(self):
     filepath = get_sample_filepath('polling_locations.geojson')
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     with assert_raises(LoaderError) as exception:
         loader.load_csv(filepath,
                         resource_id=resource_id,
                         mimetype='text/csv',
                         logger=loader.PrintLogger())
     assert_in('Error with field definition', str(exception.exception))
     assert_in('"{"type":"FeatureCollection"" is not a valid field name',
               str(exception.exception))
Example #5
0
 def test_kml(self):
     filepath = get_sample_filepath('polling_locations.kml')
     resource_id = 'test1'
     factories.Resource(id=resource_id)
     with assert_raises(LoaderError) as exception:
         loader.load_csv(filepath,
                         resource_id=resource_id,
                         mimetype='text/csv',
                         logger=loader.PrintLogger())
     assert_in('Error with field definition', str(exception.exception))
     assert_in(
         '"<?xml version="1.0" encoding="utf-8" ?>" is not a valid field name',
         str(exception.exception))
Example #6
0
    def test_reload_with_overridden_types(self):
        csv_filepath = get_sample_filepath('simple.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())
        # Change types, as it would be done by Data Dictionary
        rec = p.toolkit.get_action('datastore_search')(None, {
            'resource_id': resource_id,
            'limit': 0
        })
        fields = [f for f in rec['fields'] if not f['id'].startswith('_')]
        fields[0]['info'] = {'type_override': 'timestamp'}
        fields[1]['info'] = {'type_override': 'numeric'}
        p.toolkit.get_action('datastore_create')({
            'ignore_auth': True
        }, {
            'resource_id': resource_id,
            'force': True,
            'fields': fields
        })
        # [{
        #         'id': f['id'],
        #         'type': f['type'],
        #         'info': fi if isinstance(fi, dict) else {}
        #         } for f, fi in izip_longest(fields, info)]

        # Load it again with new types
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())

        assert_equal(len(self._get_records('test1')), 6)
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])
Example #7
0
    def test_reload(self):
        csv_filepath = get_sample_filepath('simple.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())

        # Load it again unchanged
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=loader.PrintLogger())

        assert_equal(len(self._get_records('test1')), 6)
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector', u'text', u'text', u'text'])