def build_context(self):
     return {'model': model,
             'session': Session,
             'user': u'harvest',
             'schema': default_package_schema(),
             'validate': False,
             'api_version': 1}
 def form_to_db_schema(self):
     schema = default_package_schema()
     schema.update({
         'vocab_tags': [ignore_missing,
                        convert_to_tags(TEST_VOCAB_NAME)],
     })
     return schema
Esempio n. 3
0
    def _create_or_update_package(self,package_dict,harvest_object):
        '''
            Creates a new package or updates an exisiting one according to the
            package dictionary provided. The package dictionary should look like
            the REST API response for a package:

            http://ckan.net/api/rest/package/statistics-catalunya

            Note that the package_dict must contain an id, which will be used to
            check if the package needs to be created or updated (use the remote
            dataset id).

            If the remote server provides the modification date of the remote
            package, add it to package_dict['metadata_modified'].

        '''
        try:
            #from pprint import pprint 
            #pprint(package_dict)
            ## change default schema
            schema = default_package_schema()
            schema["id"] = [ignore_missing, unicode]

            context = {
                'model': model,
                'session':Session,
                'user': u'harvest',
                'api_version':'2',
                'schema': schema,
            }

            # Check if package exists
            context.update({'id':package_dict['id']})
            try:
                existing_package_dict = package_show(context)
                # Check modified date
                if not 'metadata_modified' in package_dict or \
                   package_dict['metadata_modified'] > existing_package_dict['metadata_modified']:
                    log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid)
                    # Update package
                    updated_package = package_update_rest(package_dict,context)

                    harvest_object.package_id = updated_package['id']
                    harvest_object.save()
                else:
                    log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)

            except NotFound:
                # Package needs to be created
                del context['id']
                log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
                new_package = package_create_rest(package_dict,context)
                harvest_object.package_id = new_package['id']
                harvest_object.save()

            return True

        except ValidationError,e:
            log.exception(e)
            self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
Esempio n. 4
0
 def db_to_form_schema(self):
     schema = default_package_schema()
     schema.update({
         'tags': {
             '__extras': [keep_extras, free_tags_only]
         },
         'vocab_tags_selected': [convert_from_tags(TEST_VOCAB_NAME), ignore_missing],
     })
     return schema
Esempio n. 5
0
 def build_context(self):
     return {
         'model': model,
         'session': Session,
         'user': u'harvest',
         'schema': default_package_schema(),
         'validate': False,
         'api_version': 1
     }
 def db_to_form_schema(self):
     schema = default_package_schema()
     schema.update({
         'tags': {
             '__extras': [keep_extras, free_tags_only]
         },
         'vocab_tags_selected':
         [convert_from_tags(TEST_VOCAB_NAME), ignore_missing],
     })
     return schema
Esempio n. 7
0
    def test_1_package_schema(self):

        pkg = model.Session.query(
            model.Package).filter_by(name='annakarenina').first()

        package_id = pkg.id

        result = package_dictize(pkg, self.context)

        self.remove_changable_columns(result)

        result['name'] = 'anna2'
        # we need to remove these as they have been added
        del result['relationships_as_object']
        del result['relationships_as_subject']

        converted_data, errors = validate(result, default_package_schema(),
                                          self.context)

        assert converted_data == {
            'extras': [{
                'key': u'genre',
                'value': u'"romantic novel"'
            }, {
                'key': u'original media',
                'value': u'"book"'
            }],
            'groups': [{
                u'name': u'david',
                u'title': u"Dave's books"
            }, {
                u'name': u'roger',
                u'title': u"Roger's books",
            }],
            'license_id':
            u'other-open',
            'name':
            u'anna2',
            'notes':
            u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
            'resources': [{
                'alt_url':
                u'alt123',
                'description':
                u'Full text. Needs escaping: " Umlaut: \xfc',
                'format':
                u'plain text',
                'hash':
                u'abc123',
                'size_extra':
                u'123',
                'url':
                u'http://www.annakarenina.com/download/x=1&y=2'
            }, {
                'alt_url': u'alt345',
                'description': u'Index of the novel',
                'format': u'JSON',
                'hash': u'def456',
                'size_extra': u'345',
                'url': u'http://www.annakarenina.com/index.json'
            }],
            'tags': [{
                'name': u'Flexible \u30a1'
            }, {
                'name': u'russian'
            }, {
                'name': u'tolstoy'
            }],
            'title':
            u'A Novel By Tolstoy',
            'url':
            u'http://www.annakarenina.com',
            'version':
            u'0.7a'
        }, pformat(converted_data)

        assert not errors, errors

        data = converted_data
        data['name'] = u'annakarenina'
        data.pop("title")
        data["resources"][0]["url"] = 'fsdfafasfsaf'
        data["resources"][1].pop("url")

        converted_data, errors = validate(data, default_package_schema(),
                                          self.context)

        assert errors == {
            'name': [u'That URL is already in use.'],
            'resources': [{}, {
                'url': [u'Missing value']
            }]
            #'resources': [{}
            #              {'name': [u'That URL is already in use.']}]
        }, pformat(errors)

        data["id"] = package_id

        converted_data, errors = validate(data, default_package_schema(),
                                          self.context)

        assert errors == {
            'resources': [{}, {
                'url': [u'Missing value']
            }]
        }, pformat(errors)

        data['name'] = '????jfaiofjioafjij'
        converted_data, errors = validate(data, default_package_schema(),
                                          self.context)
        assert errors == {
            'name': [
                u'Url must be purely lowercase alphanumeric (ascii) characters and these symbols: -_'
            ],
            'resources': [{}, {
                'url': [u'Missing value']
            }]
        }, pformat(errors)
Esempio n. 8
0
 def form_to_db_schema(self):
     schema = default_package_schema()
     schema.update({
         'vocab_tags': [ignore_missing, convert_to_tags(TEST_VOCAB_NAME)],
     })
     return schema
Esempio n. 9
0
    def _create_or_update_package(self, package_dict, harvest_object):
        '''
        Creates a new package or updates an exisiting one according to the
        package dictionary provided. The package dictionary should look like
        the REST API response for a package:

        http://ckan.net/api/rest/package/statistics-catalunya

        Note that the package_dict must contain an id, which will be used to
        check if the package needs to be created or updated (use the remote
        dataset id).

        If the remote server provides the modification date of the remote
        package, add it to package_dict['metadata_modified'].

        '''
        try:
            # Change default schema
            schema = default_package_schema()
            schema['id'] = [ignore_missing, unicode]
            schema['__junk'] = [ignore]

            # Check API version
            if self.config:
                api_version = self.config.get('api_version','2')
                #TODO: use site user when available
                user_name = self.config.get('user',u'harvest')
            else:
                api_version = '2'
                user_name = u'harvest'

            context = {
                'model': model,
                'session': Session,
                'user': user_name,
                'api_version': api_version,
                'schema': schema,
            }

            tags = package_dict.get('tags', [])
            tags = [munge_tag(t) for t in tags]
            tags = list(set(tags))
            package_dict['tags'] = tags

            # Check if package exists
            data_dict = {}
            data_dict['id'] = package_dict['id']
            try:
                existing_package_dict = get_action('package_show')(context, data_dict)
                # Check modified date
                if not 'metadata_modified' in package_dict or \
                   package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'):
                    log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid)
                    # Update package
                    context.update({'id':package_dict['id']})
                    new_package = get_action('package_update_rest')(context, package_dict)

                else:
                    log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
                    return

            except NotFound:
                # Package needs to be created

                # Check if name has not already been used
                package_dict['name'] = self._check_name(package_dict['name'])

                log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
                new_package = get_action('package_create_rest')(context, package_dict)
                harvest_object.package_id = new_package['id']

            # Flag the other objects linking to this package as not current anymore
            from ckanext.harvest.model import harvest_object_table
            conn = Session.connection()
            u = update(harvest_object_table) \
                    .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \
                    .values(current=False)
            conn.execute(u, b_package_id=new_package['id'])
            Session.commit()

            # Flag this as the current harvest object

            harvest_object.package_id = new_package['id']
            harvest_object.current = True
            harvest_object.save()

            return True

        except ValidationError,e:
            log.exception(e)
            self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
Esempio n. 10
0
    def test_1_package_schema(self):

        pkg = model.Session.query(model.Package).filter_by(name='annakarenina').first()

        package_id = pkg.id

        result = package_dictize(pkg, self.context)

        self.remove_changable_columns(result)

        pprint(result)

        result['name'] = 'anna2'

        converted_data, errors = validate(result, default_package_schema(), self.context)


        pprint(errors)
        assert converted_data == {'extras': [{'key': u'genre', 'value': u'"romantic novel"'},
                                            {'key': u'original media', 'value': u'"book"'}],
                                 'groups': [{'name': u'david'}, {'name': u'roger'}],
                                 'license_id': u'other-open',
                                 'name': u'anna2',
                                 'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n \nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
                                 'resources': [{'alt_url': u'alt123',
                                                'description': u'Full text. Needs escaping: " Umlaut: \xfc',
                                                'format': u'plain text',
                                                'hash': u'abc123',
                                                'size_extra': u'123',
                                                'url': u'http://www.annakarenina.com/download/x=1&y=2'},
                                               {'alt_url': u'alt345',
                                                'description': u'Index of the novel',
                                                'format': u'json',
                                                'hash': u'def456',
                                                'size_extra': u'345',
                                                'url': u'http://www.annakarenina.com/index.json'}],
                                 'tags': [{'name': u'Flexible \u30a1'},
                                          {'name': u'russian'},
                                          {'name': u'tolstoy'}],
                                 'title': u'A Novel By Tolstoy',
                                 'url': u'http://www.annakarenina.com',
                                 'version': u'0.7a'}, pformat(converted_data)



        assert not errors, errors

        data = converted_data
        data['name'] = u'annakarenina'
        data.pop("title")
        data["resources"][0]["url"] = 'fsdfafasfsaf'
        data["resources"][1].pop("url") 

        converted_data, errors = validate(data, default_package_schema(), self.context)

        assert errors == {
            'name': [u'That URL is already in use.'],
            #'resources': [{}
            #              {'name': [u'That URL is already in use.']}]
        }, pformat(errors)

        data["id"] = package_id

        converted_data, errors = validate(data, default_package_schema(), self.context)

        assert errors == {
            #'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)

        data['name'] = '????jfaiofjioafjij'
        converted_data, errors = validate(data, default_package_schema(), self.context)
        assert errors == {
            'name': [u'Url must be purely lowercase alphanumeric (ascii) characters and these symbols: -_'],
            #'resources': [{}, {'url': [u'Missing value']}]
        },pformat(errors)
Esempio n. 11
0
    def _create_or_update_package(self, package_dict, harvest_object):
        '''
        Creates a new package or updates an exisiting one according to the
        package dictionary provided. The package dictionary should look like
        the REST API response for a package:

        http://ckan.net/api/rest/package/statistics-catalunya

        Note that the package_dict must contain an id, which will be used to
        check if the package needs to be created or updated (use the remote
        dataset id).

        If the remote server provides the modification date of the remote
        package, add it to package_dict['metadata_modified'].

        '''
        try:
            # Change default schema
            schema = default_package_schema()
            schema['id'] = [ignore_missing, unicode]
            schema['__junk'] = [ignore]

            # Check API version
            if self.config:
                api_version = self.config.get('api_version', '2')
                #TODO: use site user when available
                user_name = self.config.get('user', u'harvest')
            else:
                api_version = '2'
                user_name = u'harvest'

            context = {
                'model': model,
                'session': Session,
                'user': user_name,
                'api_version': api_version,
                'schema': schema,
            }

            tags = package_dict.get('tags', [])
            tags = [munge_tag(t) for t in tags]
            tags = list(set(tags))
            package_dict['tags'] = tags

            # Check if package exists
            data_dict = {}
            data_dict['id'] = package_dict['id']
            try:
                existing_package_dict = get_action('package_show')(context,
                                                                   data_dict)
                # Check modified date
                if not 'metadata_modified' in package_dict or \
                   package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'):
                    log.info(
                        'Package with GUID %s exists and needs to be updated' %
                        harvest_object.guid)
                    # Update package
                    context.update({'id': package_dict['id']})
                    new_package = get_action('package_update_rest')(
                        context, package_dict)

                else:
                    log.info('Package with GUID %s not updated, skipping...' %
                             harvest_object.guid)
                    return

            except NotFound:
                # Package needs to be created

                # Check if name has not already been used
                package_dict['name'] = self._check_name(package_dict['name'])

                log.info(
                    'Package with GUID %s does not exist, let\'s create it' %
                    harvest_object.guid)
                new_package = get_action('package_create_rest')(context,
                                                                package_dict)
                harvest_object.package_id = new_package['id']

            # Flag the other objects linking to this package as not current anymore
            from ckanext.harvest.model import harvest_object_table
            conn = Session.connection()
            u = update(harvest_object_table) \
                    .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \
                    .values(current=False)
            conn.execute(u, b_package_id=new_package['id'])
            Session.commit()

            # Flag this as the current harvest object

            harvest_object.package_id = new_package['id']
            harvest_object.current = True
            harvest_object.save()

            return True

        except ValidationError, e:
            log.exception(e)
            self._save_object_error(
                'Invalid package with GUID %s: %r' %
                (harvest_object.guid, e.error_dict), harvest_object, 'Import')
Esempio n. 12
0
File: forms.py Progetto: arkka/ckan
 def form_to_db_schema(self):
     schema = default_package_schema()
     schema['groups']['capacity'] = [ignore_missing, unicode]
     return schema
Esempio n. 13
0
    def test_1_package_schema(self):

        pkg = model.Session.query(model.Package).filter_by(name="annakarenina").first()

        package_id = pkg.id

        result = package_dictize(pkg, self.context)

        self.remove_changable_columns(result)

        pprint(result)

        result["name"] = "anna2"

        converted_data, errors = validate(result, default_package_schema(), self.context)

        pprint(errors)
        assert converted_data == {
            "extras": [{"key": u"genre", "value": u'"romantic novel"'}, {"key": u"original media", "value": u'"book"'}],
            "groups": [{u"name": u"david", u"title": u"Dave's books"}, {u"name": u"roger", u"title": u"Roger's books"}],
            "license_id": u"other-open",
            "name": u"anna2",
            "notes": u"Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n",
            "resources": [
                {
                    "alt_url": u"alt123",
                    "description": u'Full text. Needs escaping: " Umlaut: \xfc',
                    "format": u"plain text",
                    "hash": u"abc123",
                    "size_extra": u"123",
                    "url": u"http://www.annakarenina.com/download/x=1&y=2",
                },
                {
                    "alt_url": u"alt345",
                    "description": u"Index of the novel",
                    "format": u"json",
                    "hash": u"def456",
                    "size_extra": u"345",
                    "url": u"http://www.annakarenina.com/index.json",
                },
            ],
            "tags": [{"name": u"Flexible \u30a1"}, {"name": u"russian"}, {"name": u"tolstoy"}],
            "title": u"A Novel By Tolstoy",
            "url": u"http://www.annakarenina.com",
            "version": u"0.7a",
        }, pformat(converted_data)

        assert not errors, errors

        data = converted_data
        data["name"] = u"annakarenina"
        data.pop("title")
        data["resources"][0]["url"] = "fsdfafasfsaf"
        data["resources"][1].pop("url")

        converted_data, errors = validate(data, default_package_schema(), self.context)

        assert errors == {
            "name": [u"That URL is already in use."],
            #'resources': [{}
            #              {'name': [u'That URL is already in use.']}]
        }, pformat(errors)

        data["id"] = package_id

        converted_data, errors = validate(data, default_package_schema(), self.context)

        assert errors == {
            #'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)

        data["name"] = "????jfaiofjioafjij"
        converted_data, errors = validate(data, default_package_schema(), self.context)
        assert errors == {
            "name": [u"Url must be purely lowercase alphanumeric (ascii) characters and these symbols: -_"],
            #'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)
Esempio n. 14
0
 def form_to_db_schema(self):
     schema = default_package_schema()
     schema['groups']['capacity'] = [ ignore_missing, unicode ]
     return schema
Esempio n. 15
0
 def db_to_form_schema(self):
     '''This is an interface to manipulate data from the database
     into a format suitable for the form (optional)'''
     schema = default_package_schema()
     schema['groups']['capacity'] = [ ignore_missing, unicode ]
     return schema