コード例 #1
0
    def test_reload_with_overridden_types(self):
        if not p.toolkit.check_ckan_version(min_version='2.7'):
            raise SkipTest(
                'Requires CKAN 2.7 - see https://github.com/ckan/ckan/pull/3557'
            )
        csv_filepath = get_sample_filepath('simple.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        loader.load_csv(csv_filepath,
                        resource_id=resource_id,
                        mimetype='text/csv',
                        logger=PrintLogger())
        # Change types, as it would be done by Data Dictionary
        rec = p.toolkit.get_action('datastore_search')(None, {
            'resource_id': resource_id,
            'limit': 0
        })
        fields = [f for f in rec['fields'] if not f['id'].startswith('_')]
        fields[0]['info'] = {'type_override': 'timestamp'}
        fields[1]['info'] = {'type_override': 'numeric'}
        p.toolkit.get_action('datastore_create')({
            'ignore_auth': True
        }, {
            'resource_id': resource_id,
            'force': True,
            'fields': fields
        })
        # [{
        #         'id': f['id'],
        #         'type': f['type'],
        #         'info': fi if isinstance(fi, dict) else {}
        #         } for f, fi in izip_longest(fields, info)]

        # Load it again with new types
        fields = loader.load_csv(csv_filepath,
                                 resource_id=resource_id,
                                 mimetype='text/csv',
                                 logger=PrintLogger())
        loader.create_column_indexes(fields=fields,
                                     resource_id=resource_id,
                                     logger=PrintLogger())

        assert_equal(len(self._get_records('test1')), 6)
        assert_equal(
            self._get_column_names('test1'),
            [u'_id', u'_full_text', u'date', u'temperature', u'place'])
        assert_equal(self._get_column_types('test1'),
                     [u'int4', u'tsvector', u'timestamp', u'numeric', u'text'])

        # check that rows with nulls are indexed correctly
        records = self._get_records('test1', exclude_full_text_column=False)
        print records
        assert_equal(records[4][1], "'berkeley':1")
        assert_equal(records[5][1],
                     "'-01':2 '-03':3 '00':4,5,6 '2011':1 '5':7")
コード例 #2
0
    def test_simple_with_indexing(self):
        csv_filepath = get_sample_filepath('simple.csv')
        resource_id = 'test1'
        factories.Resource(id=resource_id)
        fields = loader.load_csv(csv_filepath, resource_id=resource_id,
                                 mimetype='text/csv', logger=PrintLogger())
        loader.create_column_indexes(fields=fields, resource_id=resource_id,
                                     logger=PrintLogger())

        assert_equal(self._get_records(
            'test1', limit=1, exclude_full_text_column=False)[0][1],
                     "'-01':2,3 '1':4 '2011':1 'galway':5")
コード例 #3
0
    def test_simple_with_indexing(self, Session):
        csv_filepath = get_sample_filepath("simple.csv")
        resource_id = "test1"
        factories.Resource(id=resource_id)
        fields = loader.load_csv(
            csv_filepath,
            resource_id=resource_id,
            mimetype="text/csv",
            logger=PrintLogger(),
        )
        loader.create_column_indexes(fields=fields,
                                     resource_id=resource_id,
                                     logger=PrintLogger())

        assert (self._get_records(Session,
                                  "test1",
                                  limit=1,
                                  exclude_full_text_column=False)[0][1] ==
                "'-01':2,3 '1':4 '2011':1 'galway':5")
コード例 #4
0
    def test_reload_with_overridden_types(self, Session):
        csv_filepath = get_sample_filepath("simple.csv")
        resource_id = "test1"
        factories.Resource(id=resource_id)
        loader.load_csv(
            csv_filepath,
            resource_id=resource_id,
            mimetype="text/csv",
            logger=PrintLogger(),
        )
        # Change types, as it would be done by Data Dictionary
        rec = p.toolkit.get_action("datastore_search")(None, {
            "resource_id": resource_id,
            "limit": 0
        })
        fields = [f for f in rec["fields"] if not f["id"].startswith("_")]
        fields[0]["info"] = {"type_override": "timestamp"}
        fields[1]["info"] = {"type_override": "numeric"}
        p.toolkit.get_action("datastore_create")(
            {
                "ignore_auth": True
            },
            {
                "resource_id": resource_id,
                "force": True,
                "fields": fields
            },
        )

        # Load it again with new types
        fields = loader.load_csv(
            csv_filepath,
            resource_id=resource_id,
            mimetype="text/csv",
            logger=PrintLogger(),
        )
        loader.create_column_indexes(fields=fields,
                                     resource_id=resource_id,
                                     logger=PrintLogger())

        assert len(self._get_records(Session, "test1")) == 6
        assert self._get_column_names(Session, "test1") == [
            u"_id",
            u"_full_text",
            u"date",
            u"temperature",
            u"place",
        ]
        assert self._get_column_types(Session, "test1") == [
            u"int4",
            u"tsvector",
            u"timestamp",
            u"numeric",
            u"text",
        ]

        # check that rows with nulls are indexed correctly
        records = self._get_records(Session,
                                    "test1",
                                    exclude_full_text_column=False)
        print(records)
        assert records[4][1] == "'berkeley':1"
        assert records[5][1] == "'-01':2 '-03':3 '00':4,5,6 '2011':1 '5':7"