def test_reload_with_overridden_types(self): if not p.toolkit.check_ckan_version(min_version='2.7'): raise SkipTest( 'Requires CKAN 2.7 - see https://github.com/ckan/ckan/pull/3557' ) csv_filepath = get_sample_filepath('simple.csv') resource_id = 'test1' factories.Resource(id=resource_id) loader.load_csv(csv_filepath, resource_id=resource_id, mimetype='text/csv', logger=PrintLogger()) # Change types, as it would be done by Data Dictionary rec = p.toolkit.get_action('datastore_search')(None, { 'resource_id': resource_id, 'limit': 0 }) fields = [f for f in rec['fields'] if not f['id'].startswith('_')] fields[0]['info'] = {'type_override': 'timestamp'} fields[1]['info'] = {'type_override': 'numeric'} p.toolkit.get_action('datastore_create')({ 'ignore_auth': True }, { 'resource_id': resource_id, 'force': True, 'fields': fields }) # [{ # 'id': f['id'], # 'type': f['type'], # 'info': fi if isinstance(fi, dict) else {} # } for f, fi in izip_longest(fields, info)] # Load it again with new types fields = loader.load_csv(csv_filepath, resource_id=resource_id, mimetype='text/csv', logger=PrintLogger()) loader.create_column_indexes(fields=fields, resource_id=resource_id, logger=PrintLogger()) assert_equal(len(self._get_records('test1')), 6) assert_equal( self._get_column_names('test1'), [u'_id', u'_full_text', u'date', u'temperature', u'place']) assert_equal(self._get_column_types('test1'), [u'int4', u'tsvector', u'timestamp', u'numeric', u'text']) # check that rows with nulls are indexed correctly records = self._get_records('test1', exclude_full_text_column=False) print records assert_equal(records[4][1], "'berkeley':1") assert_equal(records[5][1], "'-01':2 '-03':3 '00':4,5,6 '2011':1 '5':7")
def test_simple_with_indexing(self): csv_filepath = get_sample_filepath('simple.csv') resource_id = 'test1' factories.Resource(id=resource_id) fields = loader.load_csv(csv_filepath, resource_id=resource_id, mimetype='text/csv', logger=PrintLogger()) loader.create_column_indexes(fields=fields, resource_id=resource_id, logger=PrintLogger()) assert_equal(self._get_records( 'test1', limit=1, exclude_full_text_column=False)[0][1], "'-01':2,3 '1':4 '2011':1 'galway':5")
def test_simple_with_indexing(self, Session): csv_filepath = get_sample_filepath("simple.csv") resource_id = "test1" factories.Resource(id=resource_id) fields = loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=PrintLogger(), ) loader.create_column_indexes(fields=fields, resource_id=resource_id, logger=PrintLogger()) assert (self._get_records(Session, "test1", limit=1, exclude_full_text_column=False)[0][1] == "'-01':2,3 '1':4 '2011':1 'galway':5")
def test_reload_with_overridden_types(self, Session): csv_filepath = get_sample_filepath("simple.csv") resource_id = "test1" factories.Resource(id=resource_id) loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=PrintLogger(), ) # Change types, as it would be done by Data Dictionary rec = p.toolkit.get_action("datastore_search")(None, { "resource_id": resource_id, "limit": 0 }) fields = [f for f in rec["fields"] if not f["id"].startswith("_")] fields[0]["info"] = {"type_override": "timestamp"} fields[1]["info"] = {"type_override": "numeric"} p.toolkit.get_action("datastore_create")( { "ignore_auth": True }, { "resource_id": resource_id, "force": True, "fields": fields }, ) # Load it again with new types fields = loader.load_csv( csv_filepath, resource_id=resource_id, mimetype="text/csv", logger=PrintLogger(), ) loader.create_column_indexes(fields=fields, resource_id=resource_id, logger=PrintLogger()) assert len(self._get_records(Session, "test1")) == 6 assert self._get_column_names(Session, "test1") == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] assert self._get_column_types(Session, "test1") == [ u"int4", u"tsvector", u"timestamp", u"numeric", u"text", ] # check that rows with nulls are indexed correctly records = self._get_records(Session, "test1", exclude_full_text_column=False) print(records) assert records[4][1] == "'berkeley':1" assert records[5][1] == "'-01':2 '-03':3 '00':4,5,6 '2011':1 '5':7"