Ejemplo n.º 1
0
    def test_powerview_add_resource_valid(self):
        '''Adding a resource to powerview changes the resource list returned
        for the powerview.'''
        sysadmin = Sysadmin()
        r1 = Resource()

        create_dict = factories.PowerView()

        nosetools.assert_equal(create_dict['resources'], [])
        nosetools.assert_equal(PowerviewResourceAssociation.count(), 0)

        toolkit.get_action('powerview_add_resource')(
            context={'user': sysadmin['name']},
            data_dict={
                'id': create_dict['id'],
                'resource_id': r1['id']
            }
        )

        updated_dict = toolkit.get_action('powerview_show')(
            context={'user': sysadmin['name']},
            data_dict={'id': create_dict['id']}
        )

        nosetools.assert_equal(PowerviewResourceAssociation.count(), 1)

        nosetools.assert_equal(updated_dict['resources'], [r1['id']])
Ejemplo n.º 2
0
    def test_powerview_add_resource_multiple_add(self):
        '''Attempt to add resource multiple times to same powerview raises
        error.'''
        sysadmin = Sysadmin()
        r1 = Resource()

        create_dict = factories.PowerView()
        nosetools.assert_equal(create_dict['resources'], [])

        toolkit.get_action('powerview_add_resource')(
            context={'user': sysadmin['name']},
            data_dict={
                'id': create_dict['id'],
                'resource_id': r1['id']
            }
        )
        nosetools.assert_equal(PowerviewResourceAssociation.count(), 1)

        # try to add resources to same powerview again...
        with nosetools.assert_raises(ValidationError):
            toolkit.get_action('powerview_add_resource')(
                context={'user': sysadmin['name']},
                data_dict={
                    'id': create_dict['id'],
                    'resource_id': r1['id']
                }
            )
Ejemplo n.º 3
0
def m19115store_create(context, data_dict): 
    '''Adds a new 19115store.

    **Params:**
    :name [String]: 19115store name.
    :api_url [String]: api url.
    :package_id [String] Existing package id.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary
    '''    
    if not 'name' in data_dict:
        raise p.toolkit.ValidationError({'name': ['name required']}) 
    if not 'api_url' in data_dict:
        raise p.toolkit.ValidationError({'api_url': ['api_url required']}) 
    
    package_create_response = toolkit.get_action('package_create')(context, data_dict)    
    log.info(package_create_response)

    if package_create_response['id'] is None:  
        raise p.toolkit.ValidationError({'package_id': ['package_id is none']})

    package_id = package_create_response['id']
    resource_name = data_dict.get('name').lower()
    resource_url = data_dict.get('api_url')
    resource_webstore_url = data_dict.get('api_url')
    resource_dict = {'package_id': package_id,'name':resource_name,'url':resource_url,'webstore_url':resource_webstore_url,'resource_type':'19115store','format':'JSON'}
    resource_create_response = toolkit.get_action('resource_create')(context, resource_dict)
    log.info(resource_create_response)        
    return {'package_id': package_create_response['id']}                            
Ejemplo n.º 4
0
    def test_powerview_add_resource_add_resource_to_existing_list(self):
        '''Adding a resource to powerview maintains existing resources.'''
        sysadmin = Sysadmin()
        r1 = Resource()
        r2 = Resource()
        r3 = Resource()

        create_dict = factories.PowerView(resources=[r1['id'], r2['id']])
        nosetools.assert_equal(set(create_dict['resources']),
                               set([r1['id'], r2['id']]))
        nosetools.assert_equal(PowerviewResourceAssociation.count(), 2)

        toolkit.get_action('powerview_add_resource')(
            context={'user': sysadmin['name']},
            data_dict={
                'id': create_dict['id'],
                'resource_id': r3['id']
            }
        )

        updated_dict = toolkit.get_action('powerview_show')(
            context={'user': sysadmin['name']},
            data_dict={'id': create_dict['id']}
        )

        nosetools.assert_equal(PowerviewResourceAssociation.count(), 3)
        nosetools.assert_equal(set(updated_dict['resources']),
                               set([r1['id'], r2['id'], r3['id']]))
Ejemplo n.º 5
0
    def test_harvest_db_logger(self):
        # Create source and check if harvest_log table is populated
        data_dict = SOURCE_DICT.copy()
        data_dict['source_type'] = 'test'
        source = factories.HarvestSourceObj(**data_dict)
        content = 'Harvest source created: %s' % source.id
        log = harvest_model.Session.query(harvest_model.HarvestLog).\
                filter(harvest_model.HarvestLog.content==content).first()
                
        self.assertIsNotNone(log)
        self.assertEqual(log.level, 'INFO')
        
        context = {
            'model': model,
            'session': model.Session,
            'ignore_auth': True,
        }

        data = toolkit.get_action('harvest_log_list')(context, {})
        self.assertTrue(len(data) > 0)
        self.assertIn('level', data[0])
        self.assertIn('content', data[0])
        self.assertIn('created', data[0])
        self.assertTrue(data[0]['created'] > data[1]['created'])
        
        per_page = 1
        data = toolkit.get_action('harvest_log_list')(context, {'level': 'info', 'per_page': per_page})
        self.assertEqual(len(data), per_page)
        self.assertEqual(data[0]['level'], 'INFO')
Ejemplo n.º 6
0
    def purge_all(self):

        response = self.ask(
            'You are about to remove all datasets and datastore tables. Are you sure you want to continue?'
        )

        # If user confirms the action, we're going to rename the tables in a single transaction
        if response:

            pkgs = toolkit.get_action('current_package_list_with_resources')(self.context, {})

            for pkg_dict in pkgs:
                for resource in pkg_dict['resources']:

                    try:
                        toolkit.get_action('datastore_delete')(self.context, {'resource_id': resource['id'], 'force': True})
                    except logic.NotFound:
                        # Ignore missing datastore tables

                        # Load the package model and delete
                        pkg = model.Package.get(pkg_dict['id'])

                        if pkg:
                            rev = model.repo.new_revision()
                            pkg.purge()
                            model.repo.commit_and_remove()
                            print '%s purged' % pkg_dict['name']
Ejemplo n.º 7
0
    def delete(self, dataset_id, issue_number):
        dataset = self._before_dataset(dataset_id)
        if 'cancel' in request.params:
            h.redirect_to('issues_show',
                          dataset_id=dataset_id,
                          issue_number=issue_number)

        if request.method == 'POST':
            try:
                toolkit.get_action('issue_delete')(
                    data_dict={'issue_number': issue_number,
                               'dataset_id': dataset_id}
                )
            except toolkit.NotAuthorized:
                msg = _('Unauthorized to delete issue {0}'.format(
                    issue_number))
                toolkit.abort(401, msg)

            h.flash_notice(
                _('Issue {0} has been deleted.'.format(issue_number))
            )
            h.redirect_to('issues_dataset', dataset_id=dataset_id)
        else:
            return render('issues/confirm_delete.html',
                          extra_vars={
                              'issue_number': issue_number,
                              'pkg': dataset,
                          })
Ejemplo n.º 8
0
    def before_view(self, pkg_dict):
        if not self.is_supported_package_type(pkg_dict):
            return pkg_dict

        # create resource views if necessary
        user = tk.get_action('get_site_user')({'ignore_auth': True}, {})
        context = {
            'model': model,
            'session': model.Session,
            'user': user['name']
        }
        tk.check_access('package_create_default_resource_views', context)

        # get the dataset via API, as the pkg_dict does not contain all fields
        dataset = tk.get_action('package_show')(
            context,
            {'id': pkg_dict['id']}
        )

        # Make sure resource views are created before showing a dataset
        tk.get_action('package_create_default_resource_views')(
            context,
            {'package': dataset}
        )

        return pkg_dict
Ejemplo n.º 9
0
  def command(self):
    """
    """
    self._load_config()
    log = logging.getLogger(__name__)

    import ckan.model as model


    log.info('ADDING GROUPS (Eurovoc Domains)')
    root = ET.parse('ckanext/eurovoc/eurovoc_xml/dom_en.xml').getroot()

    for record in root.iter('RECORD'):
      id = record.find('DOMAINE_ID').text
      title = record.find('LIBELLE').text.title()
      name = slugify(title).lower()
      desc = 'Eurovoc Domain: ' + id + ' ' + title
      grp_dict = {'id': id, 'title': title, 'name': name, 'type': 'group', 'extras': [{'key': 'Eurovoc Domain', 'value': title}, {'key': 'Eurovoc Domain ID', 'value': id}]}
      log.info('Creating group: ' + id + ' - ' + title)
      context = {'user': '******', 'model': model, 'session': model.Session}
      try:
        toolkit.get_action('group_create')(context, grp_dict)
      except:
        pass

    log.info('ADDING VOCABULARY THESAURUS')

    context = {'user': '******', 'model': model, 'session': model.Session}
    voc_dict = {'name': 'eurovoc_thesaurus'}
    try:
      voc = toolkit.get_action('vocabulary_create')(context, voc_dict)
    except ValidationError, e:
      voc = toolkit.get_action('vocabulary_show')(context, {'id': 'eurovoc_thesaurus'})
Ejemplo n.º 10
0
def get_package_dict(datasetID):
    user = tk.get_action('get_site_user')({}, {})
    context = {'user': user['name']}
    try:
        return tk.get_action('package_show')(context, {'id': datasetID})
    except:
        return {}
Ejemplo n.º 11
0
def create_orgs(organization_id, site_user):
    api_url = config.get('ckanext.glasgow.metadata_api', '').rstrip('/')
    api_endpoint = '{}/Metadata/Organisation/{}'.format(api_url, organization_id)

    request = requests.get(api_endpoint, verify=False)
    try:
        result = _fetch_from_ec(request)
        org = result['MetadataResultSet'][0]
    except (KeyError, IndexError):
        print 'failed to fetch org {} from EC. Response {}'.format(organization_id, str(result))
        return

    context = {
        'model': model,
        'session': model.Session,
        'user': site_user,
        'local_action': True,
    }
    org_name = get_org_name(org, 'Title')
    data_dict = {
        'id': org['Id'],
        'title': org['Title'],
        'name': org_name,
    }

    try:
        toolkit.get_action('organization_create')(context, data_dict)
        context.pop('local_action', None)
        return toolkit.get_action('organization_show')(context, {id: 'organization_id'})
    except toolkit.ValidationError:
        print 'failed to create org {}'.format(organization_id)
Ejemplo n.º 12
0
def _search_issues(dataset_id, status=issuemodel.ISSUE_STATUS.open,
                   sort='newest', spam_state=None, q='', page=1,
                   per_page=get_issues_per_page()[0]):
    # use the function params to set default for our arguments to our
    # data_dict if needed
    params = locals().copy()

    # convert per_page, page parameters to api limit/offset
    limit = per_page
    offset = (page - 1) * limit
    params.pop('page', None)
    params.pop('per_page', None)
    params['offset'] = offset

    issues = toolkit.get_action('issue_search')(data_dict=params)
    issue_count = toolkit.get_action('issue_count')(data_dict=params)

    pagination = Pagination(page, limit, issue_count)

    template_variables = {
        'issues': issues,
        'status': status,
        'sort': sort,
        'q': q,
        'pagination': pagination,
    }
    if spam_state:
        template_variables['spam_state'] = spam_state
    return template_variables
    def command(cls, config_ini, org_names):
        common.load_config(config_ini)
        common.register_translator()
        from ckan.plugins import toolkit
        from ckan import model
        orgs = [toolkit.get_action('organization_show')(
                data_dict={'id': org_name})
                for org_name in org_names]
        source_org, dest_org = orgs
        assert source_org
        assert dest_org
        search_results = toolkit.get_action('package_search')(
            data_dict=dict(fq='publisher:%s' % source_org['name'], rows=1000))
        print 'Datasets: %s' % search_results['count']
        stats = Stats()
        if len(search_results['results']) != search_results['count']:
            assert 0, 'need to implement paging'

        #context = {
        #    'user': get_script_user(__name__)['name'],
        #    'ignore_auth': True,
        #    'model': model}
        rev = model.repo.new_revision()
        rev.author = 'script-%s.py' % __file__
        for dataset in search_results['results']:
            model.Package.get(dataset['id']).owner_org = dest_org['id']
            #dataset_ = toolkit.get_action('package_patch')(
            #    context=context,
            #    data_dict=dict(id=dataset['id'], owner_org=dest_org['id']))
            print stats.add('Changed owner_org', dataset['name'])
        print stats.report()
        print 'Writing'
        model.Session.commit()
Ejemplo n.º 14
0
def ccca_get_orgs ():
    """ Delivers an user-dependent list of organizations and users"""

    try:
        all_users = tk.get_action('user_list')({},{})
    except:
        return None

    # make the return dict
    user_orgs = {}

    for user in all_users:
        orgs_for_user = []
        try:
            u_orgs = tk.get_action('organization_list_for_other_user')({},{'user_id':user['id']})
        except:
            continue

        for u_org in u_orgs:
            org = u_org['organization']
            org_sum = {}
            org_sum['name'] = org['name']
            org_sum['display_name'] = org['display_name']
            org_sum['url'] = h.url_for(controller='organization', action='read', id=org['name'])
            orgs_for_user.append(org_sum)

        user_orgs[user['name']] =  orgs_for_user

    return user_orgs
Ejemplo n.º 15
0
  def _asset_from_user(self, data, additional):
    # check for image
    for field in ('url',):
        if not field in data:
          raise AssetAbsentFieldsException(field)

    # create resource if not exists
    if not additional['resources']:
      new_id = _make_uuid()
      parent = toolkit.get_action('resource_create')(additional['context'], {
        'package_id':additional['package_id'],
        'id':new_id,
        'url': _site_url() + '/datastore/dump/' + new_id,
        'name':'Assets',
        'resource_type':'asset',
      })
    # get resource if exists
    else:
      parent = toolkit.get_action('resource_show')(additional['context'], {'id': additional['resources'][0].id})

    parent_id = parent['id']
    # create datastore if not exists
    if not parent.get('datastore_active'):
      _default_datastore_create(additional['context'], parent_id)
    additional['parent_id'] = parent_id
    # add asset to datastore
    return self._add_new_asset(data, additional)
Ejemplo n.º 16
0
    def review(self, id):
        """
        sends review notification to all journal admins
        """

        context = self._context()

        try:
            tk.check_access('package_update', context, {'id': id})
        except tk.NotAuthorized:
            tk.abort(403, 'Unauthorized')

        c.pkg_dict = tk.get_action('package_show')(context, {'id': id})

        # avoid multiple notifications (eg. when someone calls review directly)
        if c.pkg_dict.get('dara_edawax_review', 'false') == 'true':
            h.flash_error("Package has already been sent to review")
            redirect(id)

        user_name = tk.c.userobj.fullname or tk.c.userobj.email
        admins = get_group_or_org_admin_ids(c.pkg_dict['owner_org'])
        addresses = map(lambda admin_id: model.User.get(admin_id).email, admins)
        note = n.review(addresses, user_name, id)

        if note:
            c.pkg_dict['dara_edawax_review'] = 'true'
            tk.get_action('package_update')(context, c.pkg_dict)
            h.flash_success('Notification to Editors sent.')
        else:
            h.flash_error('ERROR: Mail could not be sent. Please try again later or contact the site admin.')

        redirect(id)
Ejemplo n.º 17
0
 def download_and_extract(*args, **kwargs):
     # Simulate a change to the package by another party during
     # the download and extraction process.
     toolkit.get_action('package_patch')({'user': sysadmin['name']},
                                         {'id': res_dict['package_id'],
                                          'title': 'A changed title'})
     return {'fulltext': 'foobar'}
 def test_update_view_action_success(self, flash_mock):
     """Test the create view action directly (successfull test)"""
     resource_view_create = toolkit.get_action('resource_view_create')
     resource_view_update = toolkit.get_action('resource_view_update')
     # First create a resource
     data_dict = dict(self.base_data_dict.items() + {'title': 'test4'}.items())
     resource_view = resource_view_create(TestViewCreated.context, data_dict)
     # Now try to update it!
     data_dict['id'] = resource_view['id']
     data_dict['longitude_field'] = 'long2'
     resource_view_update(TestViewCreated.context, data_dict)
     # Check we have lat/long values. This is done more extensively in test_actions.
     metadata = MetaData()
     table = Table(self.resource['resource_id'], metadata, autoload=True, autoload_with=TestViewCreated.engine)
     s = select([
         table.c['latitude'],
         table.c['long2'],
         func.st_x(table.c['_geom']).label('x'),
         func.st_y(table.c['_geom']).label('y'),
     ]).where(table.c['_the_geom_webmercator'] != None)
     r = TestViewCreated.engine.execute(s)
     try:
         assert_equal(r.rowcount, 2)
         for row in r:
             assert_equal(float(row['x']), float(row['long2']))
             assert_equal(float(row['y']), float(row['latitude']))
     except:
         raise
     finally:
         r.close()
     # Check we have a message to inform us all went well
     assert_true(flash_mock.called)
     assert_equal(flash_mock.call_args[1]['category'], 'alert-success')
Ejemplo n.º 19
0
def resource_schema_fkey_delete(context, data_dict):
    '''Delete a resource's schema's foreign key.

    :param resource_id: the ID of the resource
    :type resource_id: string

    :param fkey_uid: the fkey_uid of the foreign key to delete
    :type fields: string
    '''
    try:
        data_dict, errors = dictization_functions.validate(data_dict,
            schema.resource_schema_fkey_delete_schema(), context)
    except exceptions.InvalidResourceIDException:
        raise toolkit.ValidationError(toolkit._("Invalid resource_id"))
    if errors:
        raise toolkit.ValidationError(errors)

    resource_id = data_dict['resource_id']

    schema_ = toolkit.get_action('resource_schema_show')(context,
        {'resource_id': resource_id})

    current = schema_.get('foreignKeys', [])
    fkeys = [i for i in current if i['fkey_uid'] != data_dict['fkey_uid']]
    schema_['foreignKeys'] = fkeys
    schema_ = json.dumps(schema_)

    resource_dict = toolkit.get_action('resource_show')(context,
        {'id': resource_id})

    toolkit.get_action('resource_update')(context,
        {'id': resource_id, 'url': resource_dict['url'],
         'name': resource_dict['name'], 'schema': schema_})
def update_package_published_status(package_id, status):
    '''
    Updates ths published status for a given package_id
    status:
        True -> set published status to true
        False -> set published status to false
    '''

    pkg = toolkit.get_action('package_show')(None, {'id': package_id})
    extras = pkg.get('extras', [])
    for extra in extras:
        key = extra.get('key', None)
        if key == 'published':
            extras.remove(extra)

    tags = pkg.get('tags')

    if status:
        tags.append({'name':'published'})
        new_dict = {u'key': u'published', u'value': u'true'}
    else:
        for tag in tags:
            if tag['name'] == "published":
                tags.remove(tag)
        new_dict = {u'key': u'published', u'value': u'false'}
    extras.insert(0,new_dict)

    toolkit.get_action('package_patch')(None, {'id': package_id, 'extras':extras, 'tags': tags})

    return True
Ejemplo n.º 21
0
def dataset_version_create(context, data_dict):
    id = data_dict.get('id')
    parent_name = data_dict.get('base_name')

    owner_org = data_dict.get('owner_org')

    parent_dict = {
        'name': parent_name,
    }

    if owner_org:
        parent_dict['owner_org'] = owner_org
        parent_dict['private'] = True
    else:
        parent_dict['private'] = False

    parent = _get_or_create_parent_dataset(
        context,
        parent_dict
    )

    toolkit.get_action('package_relationship_create')(
        _get_context(context), {
            'subject': id,
            'object': parent['id'],
            'type': 'child_of',
        }
    )
Ejemplo n.º 22
0
 def vocabulary(self, name):
     """Retrieve or create a vocabulary"""
     context = {"user": self.admin_user().name}
     try:
         return toolkit.get_action("vocabulary_show")(context, {"id": name})
     except:
         return toolkit.get_action("vocabulary_create")(context, {"name": name, "tags": []})
Ejemplo n.º 23
0
def _prepare_celery(context, data_dict, task_type):
  task_id = str(uuid.uuid4())
  user = context['auth_user_obj']
  if not user.sysadmin:
    raise toolkit.NotAuthorized

  userapikey = user.apikey

  celery_context = json.dumps({
      'site_url': config.get('ckan.site_url'),
      'apikey': userapikey,
  })

  task_status = {
            'entity_id': data_dict.get('resource', "without resource"),
            'entity_type': u'resource',
            'task_type': task_type,
            'key': u'celery_task_id',
            'value': data_dict.get('word', ''),
            'state':'Preparing',
            'error': u'task_id:%s' % task_id,
            'last_updated': datetime.now().isoformat()
        }

  toolkit.get_action('task_status_update')(context, task_status)

  return task_id, celery_context
Ejemplo n.º 24
0
def resource_schema_pkey_delete(context, data_dict):
    '''Delete a resource's schema's primary key.

    :param resource_id: the ID of the resource
    :type resource_id: string

    '''
    try:
        data_dict, errors = dictization_functions.validate(data_dict,
            schema.resource_schema_pkey_delete_schema(), context)
    except exceptions.InvalidResourceIDException:
        raise toolkit.ValidationError(toolkit._("Invalid resource_id"))
    assert not errors  # Nothing in resoource_schema_pkey_delete_schema ever
                       # adds anything to the errors dict.

    resource_id = data_dict.pop('resource_id')

    schema_ = toolkit.get_action('resource_schema_show')(context,
        {'resource_id': resource_id})

    if 'primaryKey' in schema_:
        del schema_['primaryKey']
    schema_ = json.dumps(schema_)

    resource_dict = toolkit.get_action('resource_show')(context,
        {'id': resource_id})

    toolkit.get_action('resource_update')(context,
        {'id': resource_id, 'url': resource_dict['url'],
         'name': resource_dict['name'], 'schema': schema_})
Ejemplo n.º 25
0
    def resource_datapreview(self, resource_id, id):
        '''
        Embeded page for a resource data-preview.

        Depending on the type, different previews are loaded.  This could be an
        img tag where the image is loaded directly or an iframe that embeds a
        webpage, recline or a pdf preview.
        '''

        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj
        }

        try:
            c.resource = toolkit.get_action('resource_show')(context,
                                                     {'id': resource_id})
            c.package = toolkit.get_action('package_show')(context, {'id': id})

            data_dict = {'resource': c.resource, 'package': c.package}

            c.resource_json = json.dumps(c.resource)
        except NotFound:
            abort(404, _('Resource not found'))
        except NotAuthorized:
            abort(401, _('Unauthorized to read resource %s') % id)
        else:
            return render('recline_interlink.html')
Ejemplo n.º 26
0
  def manage_assets(self, id, resource_id):
    # inits context
    self._init_context()

    if not c.userobj or not c.userobj.sysadmin:
      base.abort(404)
    try:
      toolkit.c.pkg_dict = toolkit.get_action('package_show')(None, {'id': id})
      toolkit.c.resource = toolkit.get_action('resource_show')(None, {'id': resource_id})
    except toolkit.ObjectNotFound:
      base.abort(404, _('Resource not found'))
    except toolkit.NotAuthorized:
      base.abort(401, _('Unauthorized to edit this resource'))

    page = int(request.params.get('page',1))
    assets = []
    try:
      result = toolkit.get_action('datastore_search')(self.context,{
        'id':resource_id,
        'limit':ASSETS_PER_PAGE,
        'offset':(page-1)*ASSETS_PER_PAGE,
        'sort':'_id asc'
      })
      assets.extend(result['records'])
    except toolkit.ObjectNotFound:
      return base.render('package/manage_assets.html')
    hidden_assets = []
    hidden = DFMPSearchQuery.run({
      'q':'id:{res_id}'.format(res_id=resource_id),
      'rows':100,
      'start':0,
      'fq':'+state:hidden',
    })['results']
    if hidden:
      for item in hidden:
        hidden_assets.append(json.loads(item['data_dict']))

    extra_vars = {
      'assets':assets,
      'hidden_assets':hidden_assets,
      'action_url':h.url_for('ajax_actions'),
    }



    def pager_url(q=None, page=None):
      params = [
        ('page', page),
      ]
      url = h.url_for('manage_assets', id=id, resource_id=resource_id)
      return url_with_params(url, params)
    c.page = h.Page(
        collection=assets,
        page=page,
        url=pager_url,#pager_url,
        item_count=result.get('total',0),
        items_per_page=ASSETS_PER_PAGE,
    )

    return base.render('package/manage_assets.html', extra_vars=extra_vars)
Ejemplo n.º 27
0
  def _listener_route(self, action, id, resource_id):
    if not c.userobj or not c.userobj.sysadmin:
      base.abort(404)

    if action == 'terminate':
      task = session.query(model.TaskStatus)\
        .filter(
          model.TaskStatus.task_type=='twitter_streaming',
          model.TaskStatus.entity_id==resource_id)\
        .first()
      if not task:
        h.flash_error("Can't find listener")
      if task:
        pid = task.error or '' 
        if not pid:
          h.flash_error("Can't get PID of process")
        else:
          h.flash_success('Success')
          toolkit.get_action('task_status_update')(None, {
            'entity_id': resource_id,
            'task_type': 'twitter_streaming',
            'key': 'celery_task_id',
            'state': 'Terminated',
            'value': 'Ready for start',
            'error': pid,
            'last_updated': datetime.datetime.now().isoformat(),
            'entity_type': 'resource'
          })
          if os.system('kill -9 %s' % pid):
            toolkit.get_action('celery_revoke')(self.context, {'id': pid, 'resource': resource_id})
    base.redirect(h.url_for('getting_tweets', id=id, resource_id=resource_id))
Ejemplo n.º 28
0
def resource_schema_pkey_create(context, data_dict):
    '''Add a primary key to a resource's schema.

    :param resource_id: the ID of the resource
    :type resource_id: string

    :param pkey: the primary key, either the name of one of the fields or a
        list of field names from the resource's schema
    :type pkey: string or iterable of strings

    :returns: the primary key that was created
    :rtype: string or list of strings

    '''
    # Fail if the resource already has a primary key.
    resource_id = toolkit.get_or_bust(data_dict, 'resource_id')
    try:
        pkey = toolkit.get_action('resource_schema_pkey_show')(context,
            {'resource_id': resource_id})
    except exceptions.InvalidResourceIDException:
        raise toolkit.ValidationError(toolkit._("Invalid resource_id"))
    if pkey is not None:
        raise toolkit.ValidationError(toolkit._("The resource already has a "
                                                "primary key"))

    # Otherwise create is the same as update.
    return toolkit.get_action('resource_schema_pkey_update')(context,
                                                             data_dict)
Ejemplo n.º 29
0
    def download_tabular_data_format(self, package_id):
        '''Return the given package as a Tabular Data Format ZIP file.

        '''
        context = {
            'model': model,
            'session': model.Session,
            'user': toolkit.c.user or toolkit.c.author,
        }
        r = toolkit.response
        r.content_disposition = 'attachment; filename={0}.zip'.format(
            package_id)
        r.content_type = 'application/octet-stream'

        # Make a zipstream and put it in the context. This means the
        # package_to_tabular_data_format action will add files into 
        # the zipstream for us.
        pkg_zipstream = zipstream.ZipFile(mode='w',
                                          compression=zipstream.ZIP_DEFLATED)
        context['pkg_zipstream'] = pkg_zipstream

        toolkit.get_action('package_to_tabular_data_format')(context,
            {'id': package_id})

        return pkg_zipstream
Ejemplo n.º 30
0
def reauthor(dataset, author_mail, msg, context):
    """
    notify author that dataset should be revised
    """

    body = u"""
Dear Author,

the Editors of '{journal}' have requested that you revise the replication files
named '{title}' which you submitted to the ZBW--Journal Data Archive.

URL: {url}

{message}
"""

    def create_message():
        if msg:
            return u"Message: \n========\n\n{}".format(msg)
        return u""

    pkg = tk.get_action('package_show')(context, {'id': dataset})
    org_id = pkg.get('owner_org', pkg.get('group_id', False))
    org = tk.get_action('organization_show')(context, {'id': org_id})
    d = {'journal': org['title'], 'url': package_url(dataset), 'title': pkg.get('name'),
         'message': create_message()}
    body = body.format(**d)
    message = MIMEText(body.encode('utf-8'), 'plain', 'utf-8')
    message['Subject'] = Header(u"ZBW Journal Data Archive: Please revise your uploaded dataset")
    message['From'] = config.get('smtp.mail_from')
    message['To'] = Header(author_mail, 'utf-8')
    message['Date'] = Utils.formatdate(time())
    message['X-Mailer'] = "CKAN {} [Plugin edawax]".format(ckan_version)

    return sendmail(author_mail, message)  # boolean
Ejemplo n.º 31
0
class LanddbcustomizePlugin(plugins.SingletonPlugin,
                            toolkit.DefaultDatasetForm, DefaultTranslation):
    plugins.implements(plugins.IConfigurer)
    plugins.implements(plugins.IDatasetForm)
    plugins.implements(plugins.ITemplateHelpers)
    plugins.implements(plugins.IFacets)
    plugins.implements(plugins.IPackageController, inherit=True)
    plugins.implements(plugins.ITranslation)

    if update_vocab:
        #translation of terms (these cannot use ITranslate)
        try:
            user = toolkit.get_action('get_site_user')({
                'ignore_auth': True
            }, {})
            context = {'user': user['name']}

            term_translation_update = toolkit.get_action(
                'term_translation_update')
            for tagList in [
                    regions_tags, datasources_tags, updatefreqs_tags,
                    custom_tags
            ]:
                for tag in tagList:
                    data = {
                        'term': tag[0],
                        'term_translation': tag[0],
                        'lang_code': "zh_Hant_TW",
                    }
                    term_translation_update(context, data)

                    data = {
                        'term': tag[0],
                        'term_translation': tag[1],
                        'lang_code': "zh_Hans_CN",
                    }
                    term_translation_update(context, data)

                    data = {
                        'term': tag[0],
                        'term_translation': tag[2],
                        'lang_code': "en",
                    }
                    term_translation_update(context, data)
        except:
            print("Vocab not updated")

    # create_vocab('regions', [ t[0] for t in regions_tags] )

    # IConfigurer

    def update_config(self, config_):
        toolkit.add_template_directory(config_, 'templates')
        toolkit.add_public_directory(config_, 'public')
        #toolkit.add_resource('fanstatic', 'landdbcustomize')
        toolkit.add_resource('assets', 'landdbcustomize')

    # IFacet
    def dataset_facets(self, facets_dict, package_type):
        new_facets_dict = extend_facets_dict(facets_dict)
        return new_facets_dict

    def group_facets(self, facets_dict, group_type, package_type):
        # somehow have to edit the facets_dict inplace instead of making a new one
        # https://github.com/ckan/ckan/issues/2713
        n = len(facets_dict)
        items = []
        for i in range(n):
            items.append(facets_dict.popitem(last=False))

        facets_dict['vocab_regions'] = toolkit._(u'Region')
        facets_dict['vocab_datasources'] = toolkit._(u'Data source')
        facets_dict['vocab_updatefreqs'] = toolkit._(u'Update frequency')

        for k, v in items:
            # print("facet dict",k,v)
            if k == 'groups':
                facets_dict[k] = toolkit._(u'Topics')
                continue
            facets_dict[k] = v

        return facets_dict

    def organization_facets(self, facets_dict, organization_type,
                            package_type):
        # somehow have to edit the facets_dict inplace instead of making a new one
        # https://github.com/ckan/ckan/issues/2713
        n = len(facets_dict)
        items = []
        for i in range(n):
            items.append(facets_dict.popitem(last=False))

        facets_dict['vocab_regions'] = toolkit._(u'Region')
        facets_dict['vocab_datasources'] = toolkit._(u'Data source')
        facets_dict['vocab_updatefreqs'] = toolkit._(u'Update frequency')

        for k, v in items:
            # print("facet dict",k,v)
            if k == 'groups':
                facets_dict[k] = toolkit._(u'Topics')
                continue
            facets_dict[k] = v

        return facets_dict

    # IPackageController
    def before_search(self, search_params):
        extras = search_params.get('extras')
        if not extras:
            # There are no extras in the search params, so do nothing.
            return search_params

        start_date = extras.get('ext_startdate')
        print("sd", start_date)

        end_date = extras.get('ext_enddate')
        print("ed", end_date)

        if not start_date and not end_date:
            # The user didn't select either a start and/or end date, so do nothing.
            return search_params
        if not start_date:
            start_date = '*'
        if not end_date:
            end_date = '*'

        # Add a date-range query with the selected start and/or end dates into the Solr facet queries.
        fq = search_params.get('fq', u'')
        fq = u'{fq} +extras_start_date:[* TO {ed}] +extras_end_date:[{sd} TO *]'.format(
            fq=fq, sd=start_date, ed=end_date)
        search_params['fq'] = fq

        return search_params

    # ITemplateHelpers
    # inform the template of our custom vocab
    def get_helpers(self):
        return {
            'regions': regions,
            'datasources': datasources,
            'updatefreqs': updatefreqs,
        }

    # IDatasetForm

    def _modify_package_schema(self, schema):
        # custom tags
        schema.update({
            'title_en': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_converter('convert_to_extras'),
            ],
            'region': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_converter('convert_to_tags')('regions'),
                regions_autoadd,
                toolkit.get_converter('convert_to_tags')('regions'),
            ],
            'datasource': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_converter('convert_to_tags')('datasources')
            ],
            'updatefreq': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_converter('convert_to_tags')('updatefreqs')
            ],
            'start_date': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_validator('isodate'),
                toolkit.get_converter('convert_to_extras'),
            ],
            'end_date': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_validator('isodate'),
                toolkit.get_converter('convert_to_extras')
            ],
            'last_update_date': [
                toolkit.get_validator('ignore_missing'),
                toolkit.get_validator('isodate'),
                toolkit.get_converter('convert_to_extras')
            ],
        })
        return schema

    def create_package_schema(self):
        # let's grab the default schema in our plugin
        schema = super(LanddbcustomizePlugin, self).create_package_schema()
        schema = self._modify_package_schema(schema)
        return schema

    def update_package_schema(self):
        # let's grab the default schema in our plugin
        schema = super(LanddbcustomizePlugin, self).update_package_schema()
        schema = self._modify_package_schema(schema)
        return schema

    def show_package_schema(self):
        schema = super(LanddbcustomizePlugin, self).show_package_schema()

        #this line prevent custom tag show in the "tags" field
        schema['tags']['__extras'].append(
            toolkit.get_converter('free_tags_only'))
        #add custom tags
        schema.update({
            'title_en': [
                toolkit.get_converter('convert_from_extras'),
                toolkit.get_validator('ignore_missing')
            ],
            'region': [
                toolkit.get_converter('convert_from_tags')('regions'),
                toolkit.get_validator('ignore_missing')
            ],
            'datasource': [
                toolkit.get_converter('convert_from_tags')('datasources'),
                toolkit.get_validator('ignore_missing')
            ],
            'updatefreq': [
                toolkit.get_converter('convert_from_tags')('updatefreqs'),
                toolkit.get_validator('ignore_missing')
            ],
            'start_date': [
                toolkit.get_converter('convert_from_extras'),
                # toolkit.get_validator('isodate'),
                toolkit.get_validator('ignore_missing')
            ],
            'end_date': [
                toolkit.get_converter('convert_from_extras'),
                # toolkit.get_validator('isodate'),
                toolkit.get_validator('ignore_missing')
            ],
            'last_update_date': [
                toolkit.get_converter('convert_from_extras'),
                # toolkit.get_validator('isodate'),
                toolkit.get_validator('ignore_missing')
            ],
        })

        return schema

    def before_view(self, dataset_dict):

        desired_lang_code = request.environ['CKAN_LANG']
        fallback_lang_code = config.get('ckan.locale_default', 'en')

        translated_title = dataset_dict.get("title_" + desired_lang_code, None)
        if translated_title:
            dataset_dict["title"] = translated_title

        return dataset_dict

    def is_fallback(self):
        # Return True to register this plugin as the default handler for
        # package types not handled by any other IDatasetForm plugin.
        return True

    def package_types(self):
        # This plugin doesn't handle any special package types, it just
        # registers itself as the default (above).
        return []
Ejemplo n.º 32
0
def action_user_update(context, data_dict):
    ''' Modified from CKAN: user_update

    Update a user account.

    Normal users can only update their own user accounts. Sysadmins can update
    any user account.

    For further parameters see ``user_create()``.

    :param id: the name or id of the user to update
    :type id: string

    :returns: the updated user account
    :rtype: dictionary

    '''
    model = context['model']
    user = context['user']
    session = context['session']
    schema = context.get('schema') or logic.schema.default_update_user_schema()
    # Modify the schema by adding translation related keys
    add_translation_modify_schema(schema)

    upload = uploader.Upload('user')
    upload.update_data_dict(data_dict, 'image_url', 'image_upload',
                            'clear_upload')

    ignore_missing = toolkit.get_validator('ignore_missing')
    convert_to_extras = toolkit.get_converter('convert_to_extras')

    schema['job_title'] = [ignore_missing, unicode, convert_to_extras]
    schema['telephone_number'] = [ignore_missing, unicode, convert_to_extras]
    schema['main_organization'] = [ignore_missing, unicode, convert_to_extras]

    schema['image_url'] = [ignore_missing, unicode, convert_to_extras]

    schema['linkedin'] = [ignore_missing, unicode, convert_to_extras]
    schema['facebook'] = [ignore_missing, unicode, convert_to_extras]
    schema['twitter'] = [ignore_missing, unicode, convert_to_extras]

    schema['blog'] = [ignore_missing, to_list_json, convert_to_extras]
    schema['www_page'] = [ignore_missing, to_list_json, convert_to_extras]

    # Add the localized keys for the localized fields to the schema
    schema = add_languages_modify(schema, _localized_fields)

    not_empty = toolkit.get_validator('not_empty')
    schema['fullname'] = [not_empty, unicode]

    id = logic.get_or_bust(data_dict, 'id')

    user_obj = model.User.get(id)
    context['user_obj'] = user_obj
    if user_obj is None:
        raise NotFound('User was not found.')

    # If the translations are not in the data_dict, the user has not added any translations or the user has deleted all translations.
    # Therefore, the translations are not sent with the POST so we need to empty and update the translations here.
    if 'translations' not in data_dict:
        data_dict['translations'] = []

    toolkit.check_access('user_update', context, data_dict)

    data, errors = validate(data_dict, schema, context)
    if errors:
        session.rollback()
        raise ValidationError(errors)

    for extra in data['extras'] if 'extras' in data else []:
        user_obj.extras[extra['key']] = extra['value']

    user = model_save.user_dict_save(data, context)

    activity_dict = {
        'user_id': user.id,
        'object_id': user.id,
        'activity_type': 'changed user'
    }

    activity_create_context = {
        'model': model,
        'user': user,
        'defer_commit': True,
        'ignore_auth': True,
        'session': session
    }

    toolkit.get_action('activity_create')(activity_create_context,
                                          activity_dict)

    # Attempt to update drupal user
    _update_drupal_user(context, data_dict)

    # TODO: Also create an activity detail recording what exactly changed in
    # the user.

    upload.upload(uploader.get_max_image_size())
    if not context.get('defer_commit'):
        model.repo.commit()
    user_data = user_dictize(user, context)

    for key, value in user.extras.iteritems():
        if key in user_data:
            log.warning(
                "Trying to override user data with extra variable '%s'", key)
            continue
        user_data[key] = value
    return user_data
Ejemplo n.º 33
0
def basket_list(user):
    baskets = tk.get_action('basket_list')({}, {'user_id': user})
    return baskets
Ejemplo n.º 34
0
    def index(self, id):
        """ Clone the specified data set record.
            Arguments:
              id (string): URL/slug of the data set.
            Returns:
              string: JSON response.
              Successful clone return value: 
                  {'status': 'success', 
                   'redirect_url': <URL of data set edit page>
                  }
              Data validation error return value:
                  {'status': 'error',
                   'errors': {<field1>: [<validation error message>],
                              <field2>: [<validation error message>]}
                  }
              Any other (unexpected) error:
                  {'status': 'error',
                   'errorMessage': <message>
                  }
        """
        logger = logging.getLogger(__name__)

        if toolkit.request.method == 'POST':
            try:
                # TODO: handle publication
                pkg = toolkit.get_action('package_show')(None, dict(id=id))

                cfg_adst = config.get('ckanext.openalberta.clonable_ds_types',
                                      'opendata,publication')
                allowed_types = set(re.split('\s*,\s*', cfg_adst))
                if pkg['type'] not in allowed_types:
                    logger.warn(
                        'Requested cloning of unsupported package type (%s). Supported types: %s.',
                        pkg['type'], cfg_adt)
                    return {
                        'status':
                        'error',
                        'errorMessage':
                        _('This package type is not allowed to be cloned.')
                    }

                pkg['title'] = toolkit.request.params.getone('title')
                pkg['name'] = toolkit.request.params.getone('name')
                pkg['date_created'] = pkg['date_modified'] = datetime.now()
                pkg['state'] = 'draft'
                del pkg['id']

                action = toolkit.get_action('package_create')
                newpkg = action(self._context, pkg)
                return {
                    'status':
                    'success',
                    'redirect_url':
                    h.url_for(controller='package',
                              action='edit',
                              id=newpkg['name'])
                }
            except toolkit.ValidationError as ve:
                errflds = set(ve.error_dict.keys()) - {'title', 'name'}
                if errflds:
                    # There are validation errors other than title and name (slug).
                    # If this happens, it means something is wrong with the package
                    return {
                        'status':
                        'error',
                        'errorMessage':
                        _('The data set is in an invalid state. Please correct it before trying to clone.'
                          )
                    }
                return {'status': 'error', 'errors': ve.error_dict}
            except:
                logger.exception('Error in PackageCloneController:index')
                return {'status': 'error', 'errorMessage': _UNEXPECTED_ERROR}

        else:
            toolkit.abort(403, _NOT_AUTHORIZED)
Ejemplo n.º 35
0
def embedKnowledge(data_dict):

    # [TODO] Get Dataset CSV Resource url from id of resource

    # Set visibility of loading gear
    loading = 'src/ckanext-liveschema_theme/ckanext/liveschema_theme/fanstatic/loading.css'
    loadingFile = open(loading, 'w')
    loadingFile.write(data_dict["loading"])
    loadingFile.close()

    # Name of folder for intermediate results
    path = "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/resources/" + data_dict[
        "dataset_name"] + "/"
    # Create Directory if not already present
    if not os.path.isdir(path):
        os.makedirs(path)

    # Create the dataframe from the CSV file
    triples = pd.read_csv(data_dict["dataset_link"], nrows=10001)

    if (len(triples) > 10000):
        # Get the final version of the package
        CKANpackage = toolkit.get_action('package_show')(
            data_dict={
                "id": data_dict["dataset_name"]
            })
        # Iterate over all the resources
        for resource in CKANpackage["resources"]:
            # Remove eventual temp resources left in case of error
            if resource["format"] == "temp":
                toolkit.get_action("resource_delete")(context={
                    "ignore_auth": True
                },
                                                      data_dict={
                                                          "id": resource["id"]
                                                      })

        # Remove visibility of loading gear
        if (os.path.isfile(loading)):
            os.remove(loading)

        # Return without continuing the execution
        return

# Name of the training file
    parsedTSV = path + data_dict["dataset_name"] + ".tsv"

    with open(parsedTSV, "w+") as train:
        # Iterate over every triples row
        for index, row in triples.iterrows():
            subj = str(row["Subject"])
            pred = str(row["Predicate"])
            obj = str(row["Object"]).replace('\r\n', '\n').replace('\n', ' | ')
            train.write(subj + "\t" + pred + "\t" + obj + "\n")

    # Call function with python3 to execute real embedder
    out = subprocess32.call(
        "python3 src/ckanext-liveschema_theme/ckanext/liveschema_theme/logic/knowledgeEmbedder.py "
        + data_dict["dataset_name"] + " !" + data_dict["options"]["strModel"] +
        " !" + data_dict["options"]["embedding_dim"] + " !" +
        data_dict["options"]["normalization_of_entities"] + " !" +
        data_dict["options"]["scoring_function"] + " !" +
        data_dict["options"]["margin_loss"] + " !" +
        data_dict["options"]["random_seed"] + " !" +
        data_dict["options"]["num_epochs"] + " !" +
        data_dict["options"]["learning_rate"] + " !" +
        data_dict["options"]["batch_size"] + " !" +
        data_dict["options"]["test_set_ratio"] + " !" +
        data_dict["options"]["filter_negative_triples"] + " !" +
        data_dict["options"]["maximum_number_of_hpo_iters"],
        shell=True)

    # Check if execution went well
    if (not out):
        # Name of the embedding Model
        embeddingModel = "trained_model.pkl"

        # Upload trained model
        upload = cgi.FieldStorage()
        upload.filename = embeddingModel
        upload.file = file(
            os.path.normpath(os.path.expanduser(path + embeddingModel)))
        data = {
            "id": data_dict["res_id_model"],
            "format": "EMB",
            'url': embeddingModel,  #'will-be-overwritten-automatically',
            'upload': upload
        }
        toolkit.get_action('resource_patch')(context={
            'ignore_auth': True
        },
                                             data_dict=data)

        # Name of the embedding
        embeddingName = data_dict["dataset_name"] + "_Emb_" + data_dict[
            "options"]["strModel"] + ".xlsx"

        # Create a the excel file for the embedding
        embeddings = xlsxwriter.Workbook(path + embeddingName)
        # Add bold cell format
        bold = embeddings.add_format({'bold': True})

        # Create the sheet for the EntitiesToEntities relations
        entities = embeddings.add_worksheet("Entities To Entities")

        # Create the sheet for the Entities embeddings
        entityEmb = embeddings.add_worksheet("Entities")

        # Create the sheet for the RelationsToRelations
        relations = embeddings.add_worksheet("Relations to Relations")

        # Create the sheet for the Relations embeddings
        relationEmb = embeddings.add_worksheet("Relations")

        # Retrieve Entities Embedding from json file
        with open(path + 'entities_to_embeddings.json') as entitiesE:
            entitiesEJSON = json.load(entitiesE)

        # Get all entities names from embeddings, sorted
        entitiesNames = sorted(entitiesEJSON.keys(), cmp=locale.strcoll)

        # Header Cell of Entities is bold
        entities.write(0, 0, 'Entity|Entity', bold)
        index = 1
        # Iterate over the data and write it out row by row.
        for entity in entitiesNames:
            # Save Entity Embeddings
            entityEmb.write(0, index - 1, entity, bold)
            entityEmb.write_column(
                1, index - 1,
                list(entitiesEJSON[list(entitiesNames)[index - 1]]))
            # Initialise Entities to Entities relations
            entities.write(index, 0, entity)
            entities.write(0, index, entity)
            index += 1

        # Iterate over every cell of the DataFrame
        for i in range(0, index - 1):
            for j in range(i, index - 1):
                # Work only with elements not on the diagonal and not already checked
                if (i != j):
                    # Transform the embeddings into a numpy array
                    arrayI = np.array(entitiesEJSON[list(entitiesNames)[i]])
                    arrayJ = np.array(entitiesEJSON[list(entitiesNames)[j]])
                    # Compute the Euclidean norm between these 2 arrays
                    norm = np.linalg.norm(arrayI - arrayJ)
                    # Update both the combinations with the norm
                    entities.write(i + 1, j + 1, norm)
                    entities.write(j + 1, i + 1, norm)
                else:
                    entities.write(i + 1, i + 1, 0)

        # Retrieve Relations Embedding from json file
        with open(path + 'relations_to_embeddings.json') as relationsE:
            relationsEJSON = json.load(relationsE)

        # Get all relations names from embeddings, sorted
        relationsNames = sorted(relationsEJSON.keys(), cmp=locale.strcoll)

        # Header Cell is bold
        relations.write(0, 0, 'Relation|Relation', bold)
        index = 1
        # Iterate over the data and write it out row by row.
        for relation in relationsNames:
            # Save Relations Embeddings
            relationEmb.write(0, index - 1, relation, bold)
            relationEmb.write_column(
                1, index - 1,
                list(relationsEJSON[list(relationsNames)[index - 1]]))
            # Initialise Relations to Relations relations
            relations.write(index, 0, relation)
            relations.write(0, index, relation)
            index += 1

        # Iterate over every cell of the DataFrame
        for i in range(0, index - 1):
            for j in range(i, index - 1):
                # Work only with elements not on the diagonal and not already checked
                if (i != j):
                    # Transform the embeddings into a numpy array
                    arrayI = np.array(relationsEJSON[list(relationsNames)[i]])
                    arrayJ = np.array(relationsEJSON[list(relationsNames)[j]])
                    # Compute the Euclidean norm between these 2 arrays
                    norm = np.linalg.norm(arrayI - arrayJ)
                    # Update both the combination with the norm
                    relations.write(i + 1, j + 1, norm)
                    relations.write(j + 1, i + 1, norm)
                else:
                    relations.write(i + 1, i + 1, 0)

        # Close the embeddings Excel file
        embeddings.close()

        # Upload the csv file to LiveSchema
        upload = cgi.FieldStorage()
        upload.filename = embeddingName
        upload.file = file(
            os.path.normpath(os.path.expanduser(path + embeddingName)))
        data = {
            "id": data_dict["res_id"],
            "format": "EMB",
            'url': embeddingName,  #'will-be-overwritten-automatically',
            'upload': upload
        }
        toolkit.get_action('resource_patch')(context={
            'ignore_auth': True
        },
                                             data_dict=data)

        # Add file to DataStore using DataPusher
        import ckanext.datapusher.logic.action as dpaction
        dpaction.datapusher_submit(
            context={'ignore_auth': True},
            data_dict={'resource_id': str(data_dict["res_id"])})

        # Create a Data Explorer view of the resource
        toolkit.get_action('resource_view_create')(
            context={
                'ignore_auth': True
            },
            data_dict={
                'resource_id': str(data_dict["res_id"]),
                'title': "Data Explorer",
                'view_type': "recline_view"
            })

    else:  # If there has been a problem with the execution
        # Remove temp resources
        toolkit.get_action('resource_delete')(context={
            'ignore_auth': True
        },
                                              data_dict={
                                                  'id': data_dict["res_id"]
                                              })
        toolkit.get_action('resource_delete')(context={
            'ignore_auth': True
        },
                                              data_dict={
                                                  'id':
                                                  data_dict["res_id_model"]
                                              })

    # Get the final version of the package
    CKANpackage = toolkit.get_action('package_show')(
        data_dict={
            "id": data_dict["dataset_name"]
        })
    # Iterate over all the resources
    for resource in CKANpackage["resources"]:
        # Remove eventual temp resources left in case of error
        if resource["format"] == "temp" and (resource["resource_type"]
                                             == "Emb"):
            toolkit.get_action("resource_delete")(context={
                "ignore_auth": True
            },
                                                  data_dict={
                                                      "id": resource["id"]
                                                  })

    # Remove intermediate results
    try:
        shutil.rmtree(path)
    except OSError as e:
        print("Error: %s : %s" % (path, e.strerror))

    # Remove visibility of loading gear
    if (os.path.isfile(loading)):
        os.remove(loading)
Ejemplo n.º 36
0
    def copy(self, id):
        context = {
            'model': m,
            'session': m.Session,
            'user': p.toolkit.c.user or p.toolkit.c.author,
            'auth_user_obj': p.toolkit.c.userobj,
            'save': 'save' in t.request.params,
        }

        # check permissions
        try:
            t.check_access('package_create', context)
        except t.NotAuthorized:
            t.abort(401, t._('Unauthorized to copy this package'))

        data_dict = {'id': id}
        data = t.get_action('package_show')(None, data_dict)

        # change dataset title and name
        data['name'] = '{}-copy'.format(data['name'])
        while True:
            try:
                _pkg = t.get_action('package_show')(None, {
                    'name_or_id': data['name']
                })
            except l.NotFound:
                break
            else:
                import random
                data['name'] = '{}-copy-{}'.format(data['name'],
                                                   random.randint(1, 100))

        data['title'] = 'Copy of {0}'.format(data['title'])

        # remove unnecessary attributes from the dataset
        remove_attrs = [
            'id', 'revision_id', 'metadata_created', 'metadata_modified',
            'resources', 'revision_timestamp'
        ]
        for attr in remove_attrs:
            if attr in data:
                del data[attr]

        if data and 'type' in data:
            package_type = data['type']
        else:
            package_type = self._guess_package_type(True)

        data = data or clean_dict(
            dict_fns.unflatten(
                tuplize_dict(
                    parse_params(t.request.params,
                                 ignore_keys=CACHE_PARAMETERS))))
        c.resources_json = h.json.dumps(data.get('resources', []))

        # convert tags if not supplied in data
        if data and not data.get('tag_string'):
            data['tag_string'] = ', '.join(
                h.dict_list_reduce(data.get('tags', {}), 'name'))

        # if we are creating from a group then this allows the group to be
        # set automatically
        data['group_id'] = t.request.params.get('group') or \
                           t.request.params.get('groups__0__id')

        # in the phased add dataset we need to know that
        # we have already completed stage 1
        stage = ['active']
        if data.get('state', '').startswith('draft'):
            stage = ['active', 'complete']

        form_snippet = self._package_form(package_type=package_type)
        form_vars = {
            'data': data,
            'errors': {},
            'error_summary': {},
            'action': 'new',
            'stage': stage,
            'dataset_type': package_type,
        }

        c.errors_json = h.json.dumps({})

        # override form action to use built-in package controller
        c.form_action = t.url_for(controller='package', action='new')

        self._setup_template_variables(context, {}, package_type=package_type)
        new_template = self._new_template(package_type)
        extra_vars = {
            'form_vars': form_vars,
            'form_snippet': form_snippet,
            'dataset_type': package_type
        }

        return t.render(new_template, extra_vars=extra_vars)
Ejemplo n.º 37
0
    def migrate_package_to_s3(self, context, package_name):
        """migrate_package_to_s3 - Migrates package to S3 by calling
        resource_update on each resource.
        """
        # Obtain logger
        logger = logging.getLogger(__name__)
        logger.info("Starting package migration to S3 for package %s",
                    package_name)
        try:
            pkg = toolkit.get_action('package_show')(context, {
                'id': package_name
            })
            if pkg.get('num_resources') > 0:
                for resource in pkg.get('resources'):
                    # If the resource is already uploaded to S3, don't reupload
                    if self.skip_existing_s3_upload and \
                                    resource['url_type'] == 's3':
                        logger.info(
                            "Resource %s is already on S3, skipping to next "
                            "resource.", resource.get('name', ''))
                        continue

                    # If filetype of resource is blacklisted, skip the
                    # upload to S3
                    if not upload.is_blacklisted(resource):
                        try:
                            logger.info(
                                "Attempting to migrate resource %s to S3...",
                                resource.get('name', ''))
                            self.change_to_s3(context, resource)
                            logger.info(
                                "Successfully migrated resource %s to S3.",
                                resource.get('name', ''))
                        except Exception as error:
                            logger.error(
                                "Error when migrating resource %s - %s",
                                resource.get('name', ''), error)
                            raise error
                    else:
                        logger.info(
                            "Resource %s is blacklisted, skipping to next "
                            "resource.", resource.get('name', ''))

                        # Upload resource zipfile to S3
                        # If not blacklisted, will be done automatically as
                        # part of resource_update.
                        upload.upload_resource_zipfile_to_s3(context, resource)

                # After updating all the resources, upload package zipfile to S3
                upload.upload_package_zipfile_to_s3(context, pkg)

        except Exception as error:
            logger.error("Error when migrating package %s with error %s",
                         package_name, error)
            self.pkg_crashes_w_error.append({
                'pkg_name': package_name,
                'error': error
            })
        finally:
            # Cleanup sqlalchemy session
            # Required to prevent errors when uploading remaining packages
            model.Session.remove()
Ejemplo n.º 38
0
 def change_to_s3(self, context, resource):
     """change_to_s3 - performs resource_update. The before and
     after update hooks
     upload the resource and the resource/package zipfiles to S3
     """
     toolkit.get_action('resource_update')(context, resource)
Ejemplo n.º 39
0
def create_thumbnail(package_id, resource_id=None, width=None, height=None):
    '''Creates a thumbnail in a dataset and returns its url

    :rtype: string
    '''
    if c.user == None or len(c.user) == 0:
        return None

    if width == None:
        cfg_width = config.get('ckan.datasetthumbnail.thumbnail_width', 140)
        width = toolkit.asint(cfg_width)

    if height == None:
        cfg_height = config.get('ckan.datasetthumbnail.thumbnail_height',
                                int(width * 1.415))
        height = toolkit.asint(cfg_height)

    package = toolkit.get_action('package_show')(context={
        'ignore_auth': True
    },
                                                 data_dict={
                                                     'id': package_id
                                                 })

    resource = None
    if resource_id != None:
        resource = toolkit.get_action('resource_show')(context={
            'ignore_auth': True
        },
                                                       data_dict={
                                                           'id': resource_id
                                                       })

    if resource == None:
        for pkg_resource in package['resources']:
            if pkg_resource['format'] == 'JPEG' or pkg_resource[
                    'format'] == 'PNG':
                resource = pkg_resource
                break

    if resource != None:
        headers = {}
        if resource['url_type'] == 'upload':
            if hasattr(c, 'userobj') and hasattr(c.userobj, 'apikey'):
                headers['Authorization'] = c.userobj.apikey

        try:
            response = requests.get(resource['url'],
                                    headers=headers,
                                    stream=True)
        except requests.exceptions.RequestException:
            # Silently fail on any request exception on the basis that it's
            # better to have a working page with missing thumbnails than a
            # broken one.
            return

        if response.status_code == 200:
            original_fp = StringIO(
            )  #create an in-memory file object in which to save the image

            for chunk in response.iter_content(1024):
                original_fp.write(chunk)
            original_fp.flush()

            image = None

            try:
                image = Image.open(original_fp)
            except IOError:
                #if an image can't be parsed from the response...
                return None

            image.thumbnail((width, height))

            thumbnail_fp = StringIO()
            thumbnail_fp.name = 'thumbnail.png'
            image.save(thumbnail_fp, format='PNG')

            thumbnail_resource = {}
            thumbnail_resource['package_id'] = package['id']
            thumbnail_resource['url'] = 'thumbnail.png'
            thumbnail_resource['url_type'] = 'upload'
            thumbnail_resource['format'] = 'png'
            thumbnail_resource['name'] = 'thumbnail.png'
            thumbnail_resource['upload'] = _UploadLocalFileStorage(
                thumbnail_fp)

            created_resource = toolkit.get_action('resource_create')(
                context={
                    'ignore_auth': True
                }, data_dict=thumbnail_resource)
            thumbnail_fp.close()
            original_fp.close()

            return created_resource['url']

    return None
Ejemplo n.º 40
0
def _add_member_to_groups(member, context):
    groups = tk.get_action('group_list')(context, {})
    for group in groups:
        _add_member_to_group(member, group, context)
Ejemplo n.º 41
0
def _get_action(action, data_dict):
    return toolkit.get_action(action)(_get_context(), data_dict)
Ejemplo n.º 42
0
    def copy_resources(self, id, data=None, errors=None, error_summary=None):
        context = {
            'model': m,
            'session': m.Session,
            'user': p.toolkit.c.user or p.toolkit.c.author,
            'auth_user_obj': p.toolkit.c.userobj,
            'save': 'save' in t.request.params,
        }

        # check permissions
        try:
            t.check_access('package_create', context)
        except t.NotAuthorized:
            t.abort(401, t._('Unauthorized to copy this package'))

        # get package type
        if data and 'type' in data:
            package_type = data['type']
        else:
            package_type = self._guess_package_type(True)

        resources = None
        if data is None:
            data = t.get_action('package_show')(None, {'id': id})
            # generate new unused package name
            data['title'] = 'Copy of {0}'.format(data['title'])
            data['name'] = '{}-copy'.format(data['name'])
            while True:
                try:
                    _ = t.get_action('package_show')(None, {
                        'name_or_id': data['name']
                    })
                except l.NotFound:
                    break
                else:
                    import random
                    data['name'] = '{}-copy-{}'.format(data['name'],
                                                       random.randint(1, 100))

            # remove unnecessary attributes from the dataset
            remove_attrs = [
                'id', 'revision_id', 'metadata_created', 'metadata_modified',
                'revision_timestamp'
            ]
            for attr in remove_attrs:
                if attr in data:
                    del data[attr]

            # process package resources
            resources = data.pop('resources', [])
            remove_attrs = ('id', 'revision_id', 'created', 'last_modified',
                            'package_id')
            for resource in resources:
                for attr in remove_attrs:
                    if attr in resource:
                        del resource[attr]

            c.resources_json = h.json.dumps(resources)

        form_snippet = 'package/copy_package_form.html'
        c.form_action = t.url_for(
            controller='ckanext.sokigo.controller:CopyController',
            action='copy_resources',
            id=id)

        if context['save'] and t.request.method == 'POST':
            data = clean_dict(
                dict_fns.unflatten(
                    tuplize_dict(
                        parse_params(t.request.POST,
                                     ignore_keys=CACHE_PARAMETERS))))

            data['resources'] = resources

            # convert tags if not supplied in data
            if data and not data.get('tag_string'):
                data['tag_string'] = ', '.join(
                    h.dict_list_reduce(data.get('tags', {}), 'name'))

            # if we are creating from a group then this allows the group to be
            # set automatically
            data['group_id'] = t.request.params.get('group') or \
                               t.request.params.get('groups__0__id')

            try:
                pkg_dict = t.get_action('package_create')(context, data)
            except l.NotAuthorized:
                t.abort(403, _('Unauthorized to read package %s') % '')
            except l.NotFound as e:
                t.abort(404, _('Dataset not found'))
            except dict_fns.DataError:
                t.abort(400, _(u'Integrity Error'))
            except SearchIndexError as e:
                try:
                    exc_str = text_type(repr(e.args))
                except Exception:  # We don't like bare excepts
                    exc_str = text_type(str(e))
                t.abort(500,
                        _(u'Unable to add package to search index.') + exc_str)
            except t.ValidationError as e:
                data['state'] = 'none'
                c.data = data
                c.errors_json = h.json.dumps(e.error_dict)
                form_vars = {
                    'data': data,
                    'errors': e.error_dict,
                    'error_summary': e.error_summary,
                    'action': 'new',
                    'stage': data['state'],
                    'dataset_type': package_type
                }

                extra_vars = {
                    'form_vars': form_vars,
                    'form_snippet': form_snippet,
                    'dataset_type': package_type
                }

                return t.render('package/copy.html', extra_vars=extra_vars)

            else:
                h.redirect_to(controller='package',
                              action='read',
                              id=pkg_dict['name'])

        c.data = data
        c.errors_json = h.json.dumps(errors)
        form_vars = {
            'data': data,
            'errors': errors or {},
            'error_summary': error_summary or {},
            'action': 'new',
            'stage': data['state'],
            'dataset_type': package_type
        }

        extra_vars = {
            'form_vars': form_vars,
            'form_snippet': form_snippet,
            'dataset_type': package_type
        }

        return t.render('package/copy.html', extra_vars=extra_vars)
Ejemplo n.º 43
0
def _create_or_update_dataset(dataset):
    context = {}
    user = tk.get_action('get_site_user')({'ignore_auth': True}, {})
    context.update({'user': user['name']})

    harvester = SwissDCATRDFHarvester()
    name = harvester._gen_new_name(dataset['title'])

    package_plugin = lib_plugins.lookup_package_plugin('dataset')
    data_dict = {
        'identifier': dataset['identifier'],
        'include_private': True,
        'include_drafts': True,
    }

    try:
        existing_dataset = tk.get_action('ogdch_dataset_by_identifier')(
            context,
            data_dict
        )
        context['schema'] = package_plugin.update_package_schema()

        # Don't change the dataset name even if the title has changed
        dataset['name'] = existing_dataset['name']
        dataset['id'] = existing_dataset['id']
        # Don't make a dataset public if it wasn't already
        is_private = existing_dataset['private']
        dataset['private'] = is_private

        map_existing_resources_to_new_dataset(dataset, existing_dataset)

        tk.get_action('package_update')(context, dataset)

        success_message = 'Updated dataset %s.' % dataset['name']
        if is_private:
            success_message += ' The dataset visibility is private.'

        h.flash_success(success_message)

    except NotFound as e:
        package_schema = package_plugin.create_package_schema()
        context['schema'] = package_schema

        # We need to explicitly provide a package ID
        dataset['id'] = str(uuid.uuid4())
        package_schema['id'] = [str]
        dataset['name'] = name
        # Create datasets as private initially
        dataset['private'] = True

        try:
            tk.get_action('package_create')(context, dataset)
        except ValidationError as e:
            h.flash_error(
                'Error importing dataset %s: %r' %
                (dataset.get('name', ''), e.error_summary))

            return

        h.flash_success(
            'Created dataset %s. The dataset visibility is private.' %
            dataset['name'])

    except Exception as e:
        h.flash_error(
            'Error importing dataset %s: %r' %
            (dataset.get('name', ''), e))
Ejemplo n.º 44
0
 def _get_action(self, action, data_dict):
     return toolkit.get_action(action)(self._get_ctx(), data_dict)
Ejemplo n.º 45
0
    def _get_or_create_user(self, env):
        #WSGI Variables
        #Shib-Application-ID            'default'
        #Shib-Authentication-Instant    '2012-08-13T12:04:22.492Z'
        #Shib-Authentication-Method     'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport'
        #Shib-AuthnContext-Class        'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport'
        #Shib-Identity-Provider         'https://idp.example.com/idp/shibboleth'
        #Shib-Session-ID                '_7ec5a681e6dbae627c1cefcc7cb4d56a'
        #Shib-Session-Index             '39dafd8477850f5e0b968e3561570197f2109948c1d374a7a2b4c9a7adbf8628'
        #cn                             'My Other Self'
        #givenName                      'My Other Self'
        #mail                           '*****@*****.**'

        eppn = env.get(self.eppn, None)
        fullname = env.get(self.fullname, None)
        email = env.get(self.mail, None)

        if not eppn or not fullname:
            log.debug(
                'Environ does not contain eppn or cn attributes, user not loaded.')
            return None

        user = model.Session.query(model.User).autoflush(False) \
            .filter_by(name=eppn).first()

        # Check if user information from shibboleth has changed
        if user:
            if (user.fullname != fullname or user.email != email):
                log.debug('User attributes modified, updating.')
                user.fullname = fullname
                user.email = email

        else:  # user is None:
            log.debug('User does not exists, creating new one.')

            basename = eppn
            username = basename
            suffix = 0
            while not model.User.check_name_available(username):
                suffix += 1
                username = basename + str(suffix)

            user = model.User(name=username,
                              fullname=fullname,
                              email=email,
                              openid=eppn)

            model.Session.add(user)
            model.Session.flush()
            log.info('Created new user {usr}'.format(usr=fullname))

        groups = env.get(self.groups, None)

        if groups:
            groups = groups.split(";")
            log.debug("groups: {}".format(sorted(groups)))
            orgs = toolkit.get_action('group_list')(data_dict={
              'all_fields': True,
              'include_extras': True
            })
            log.debug("orgs: {}".format(orgs))
            add_member = toolkit.get_action('group_member_create')

            # Ensure there's a pylons.translator object
            registry = Registry()
            registry.prepare()
            registry.register(pylons.translator, MockTranslator())

            for o in orgs:
                for e in o['extras']:
                    if e['key'] == 'ecgroup' and e['value'] in groups:
                        log.debug("Adding {} to {}".format(user.name, o['name']))
                        add_member(context={
                          'user': '******'
                        }, data_dict={
                          'id': o['name'],
                          'username': user.name,
                          'role': 'member'
                        })

        model.Session.commit()
        model.Session.remove()
        return user
Ejemplo n.º 46
0
def _add_members_to_group(group, context):
    members = tk.get_action('user_list')(context, {})
    for member in members:
        if not member['sysadmin']:
            _add_member_to_group(member.get('id'), group, context)
Ejemplo n.º 47
0
def generateFCA(data_dict):
    # [TODO] Get Dataset CSV Resource url from id of resource

    # Set visibility of loading gear
    loading = 'src/ckanext-liveschema_theme/ckanext/liveschema_theme/fanstatic/loading.css'
    loadingFile = open(loading, 'w')
    loadingFile.write(data_dict["loading"])
    loadingFile.close()

    # Create the dataframe from the CSV file
    triples = pd.read_csv(data_dict["dataset_link"], nrows=10001)

    if (len(triples) > 10000):
        # Get the final version of the package
        CKANpackage = toolkit.get_action('package_show')(
            data_dict={
                "id": data_dict["dataset_name"]
            })
        # Iterate over all the resources
        for resource in CKANpackage["resources"]:
            # Remove eventual temp resources left in case of error
            if resource["format"] == "temp":
                toolkit.get_action("resource_delete")(context={
                    "ignore_auth": True
                },
                                                      data_dict={
                                                          "id": resource["id"]
                                                      })

        # Remove visibility of loading gear
        if (os.path.isfile(loading)):
            os.remove(loading)

        # Return without continuing the execution
        return

    # Sort the DataFrame
    triples = triples.sort_values("ObjectTerm")

    # Create the DataFrame used to create the FCA matrix
    matrix = pd.DataFrame(columns=["TypeTerm", "PropertiesTokens"])

    # Create the strings used to store multiple values in the same row, using " - " as separator
    obj = ""
    propTokens = ""
    # Dictionary used to store the triple
    dict_ = dict()

    # Iterate over every triples row
    for index, row in triples.iterrows():
        # Check if the triple has to be saved, if there is a predicate selection then checks if that predicate has to be saved
        bool_ = False
        # If there is no predicate selection then save every triple
        strPredicates = data_dict["strPredicates"]
        if (len(strPredicates.split()) == 0):
            bool_ = True
        # If there is a predicate selection then check if that predicate has to be saved
        else:
            for pred in strPredicates.split():
                if (pred == str(row["PredicateTerm"])
                        or pred == str(row["Predicate"])):
                    bool_ = True
                    break
        # Check if the triple has to be saved
        if (bool_ and "http" == str(row["Subject"])[0:4]
                and "http" == str(row["Object"])[0:4]):
            # If the object value on the row has changed(first row or a new object)
            if (row["ObjectTerm"] != obj):
                # If the name of the object is not null
                if (len(obj)):
                    # Add to the dictionary the latest values of the row
                    dict_["PropertiesTokens"] = propTokens[3:]
                    # Store the row in the matrix
                    matrix = matrix.append(dict_, ignore_index=True)
                # Reset the name of the new object
                obj = row["ObjectTerm"]
                # Reset the other values of the row
                propTokens = ""
                # Store in the dictionary the fixed values of the row
                #dict_ = {"Type": " " + row["Object"], "TypeTerm": row["ObjectTerm"]}
                dict_ = {"TypeTerm": row["ObjectTerm"]}

            # Tokenize on capitalLetters the SubjectTerm obtaining the simple words of its composition as strings separated by " "
            pTok = tokenTerm(row["SubjectTerm"])
            # Add the info of the tokens to the string containing multiple values
            propTokens = propTokens + " - " + pTok

    # Update the last row with the latest info
    dict_["PropertiesTokens"] = propTokens[3:]
    # Store the last row in the matrix
    matrix = matrix.append(dict_, ignore_index=True)

    # Set used to avoid the creation of the same column more than once
    tokSet = set()
    # Iterate over every row of the matrix
    for index, row in matrix.iterrows():
        # Create a list of tokens from that row's PropertiesTokens' cell
        toks = [
            x for x in row["PropertiesTokens"].replace("- ", "").split(" ")
            if x
        ]
        # For every token in toks
        for tok in toks:
            # Check if that token is already a column
            setInd = len(tokSet)
            tokSet.add(tok)
            # If the token is new
            if (setInd < len(tokSet)):
                # Create a column of 0 for that token
                matrix[tok] = 0
            # Update the value of the cell in the row of the matrix with tok as column(obtaining the number of token for that row)
            matrix.at[index, tok] = matrix.at[index, tok] + 1

    # Drop PropertiesTokens since it is no more useful
    matrix.drop("PropertiesTokens", axis=1, inplace=True)

    # Parse the FCA matrix into the csv file
    matrix.to_csv(
        os.path.normpath(
            os.path.expanduser(
                "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/"
                + data_dict["dataset_name"] + "_FCA.csv")))

    # Upload the csv file to LiveSchema
    upload = cgi.FieldStorage()
    upload.filename = data_dict["dataset_name"] + "_FCA.csv"
    upload.file = file(
        "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/" +
        data_dict["dataset_name"] + "_FCA.csv")
    data = {
        'id': data_dict["res_id"],
        'url': data_dict["dataset_name"] +
        "_FCA.csv",  #'will-be-overwritten-automatically',
        'upload': upload,
        "format": "FCA"
    }
    toolkit.get_action('resource_patch')(context={
        'ignore_auth': True
    },
                                         data_dict=data)

    # Add file to DataStore using DataPusher
    import ckanext.datapusher.logic.action as dpaction
    dpaction.datapusher_submit(
        context={'ignore_auth': True},
        data_dict={'resource_id': str(data_dict["res_id"])})

    # Create a Data Explorer view of the resource
    toolkit.get_action('resource_view_create')(context={
        'ignore_auth': True
    },
                                               data_dict={
                                                   'resource_id':
                                                   str(data_dict["res_id"]),
                                                   'title':
                                                   "Data Explorer",
                                                   'view_type':
                                                   "recline_view"
                                               })

    # Remove the temporary csv file from the server
    os.remove("src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/" +
              data_dict["dataset_name"] + "_FCA.csv")

    # Get the final version of the package
    CKANpackage = toolkit.get_action('package_show')(
        data_dict={
            "id": data_dict["dataset_name"]
        })
    # Iterate over all the resources
    for resource in CKANpackage["resources"]:
        # Remove eventual temp resources left in case of error
        if resource["format"] == "temp" and (resource["resource_type"]
                                             == "FCA"):
            toolkit.get_action("resource_delete")(context={
                "ignore_auth": True
            },
                                                  data_dict={
                                                      "id": resource["id"]
                                                  })

    # Remove visibility of loading gear
    if (os.path.isfile(loading)):
        os.remove(loading)
Ejemplo n.º 48
0
    def dashboard(self):
        context = {'user': c.user, 'auth_user_obj': c.userobj}
        c.harvest_source_infos = tk.get_action(
            'get_harvest_source_infos_for_user')(context, {})  # noqa

        return render('harvester_dashboard/list.html')
Ejemplo n.º 49
0
    def resource_read(self, id, resource_id):
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj,
            'for_view': True
        }
        if context['user']:
            '''If user is logged in
            '''
            viewer_id = context['auth_user_obj'].id
            save_view_details(viewer_id, resource_id, context)
        try:
            c.package = toolkit.get_action('package_show')(context, {'id': id})
        except (NotFound, NotAuthorized):
            abort(404, _('Dataset not found'))

        for resource in c.package.get('resources', []):
            if resource['id'] == resource_id:
                c.resource = resource
                break
        if not c.resource:
            abort(404, _('Resource not found'))

        # required for nav menu
        c.pkg = context['package']
        c.pkg_dict = c.package
        dataset_type = c.pkg.type or 'dataset'

        # get package license info
        license_id = c.package.get('license_id')
        try:
            c.package['isopen'] = model.Package.\
                get_license_register()[license_id].isopen()
        except KeyError:
            c.package['isopen'] = False

        # TODO: find a nicer way of doing this
        c.datastore_api = '%s/api/action' % \
            config.get('ckan.site_url', '').rstrip('/')

        c.resource['can_be_previewed'] = self._resource_preview({
            'resource':
            c.resource,
            'package':
            c.package
        })

        resource_views = toolkit.get_action('resource_view_list')(
            context, {
                'id': resource_id
            })
        c.resource['has_views'] = len(resource_views) > 0

        current_resource_view = None
        view_id = request.GET.get('view_id')
        if c.resource['can_be_previewed'] and not view_id:
            current_resource_view = None
        elif c.resource['has_views']:
            if view_id:
                current_resource_view = [
                    rv for rv in resource_views if rv['id'] == view_id
                ]
                if len(current_resource_view) == 1:
                    current_resource_view = current_resource_view[0]
                else:
                    abort(404, _('Resource view not found'))
            else:
                current_resource_view = resource_views[0]

        vars = {
            'resource_views': resource_views,
            'current_resource_view': current_resource_view,
            'dataset_type': dataset_type
        }

        template = self._resource_template(dataset_type)
        return render(template, extra_vars=vars)
Ejemplo n.º 50
0
    def edit(self, id=None, data=None, errors=None, error_summary=None):
        context = {
            'save': 'save' in request.params,
            'schema': self._edit_form_to_db_schema(),
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj
        }
        if id is None:
            if c.userobj:
                id = c.userobj.id
            else:
                abort(400, _('No user specified'))
        data_dict = {'id': id}

        try:
            check_access('user_update', context, data_dict)
        except NotAuthorized:
            abort(403, _('Unauthorized to edit a user.'))

        if (context['save']) and not data:
            return self._save_edit(id, context)

        try:
            old_data = toolkit.get_action('user_show')(context, data_dict)

            schema = self._db_to_edit_form_schema()
            if schema:
                old_data, errors = \
                    dictization_functions.validate(old_data, schema, context)

            c.display_name = old_data.get('display_name')
            c.user_name = old_data.get('name')

            extra_data = user_analytics_present(context)
            if extra_data:
                old_data['occupation'] = extra_data.occupation

            data = data or old_data

        except NotAuthorized:
            abort(403, _('Unauthorized to edit user %s') % '')
        except NotFound:
            abort(404, _('User not found'))

        user_obj = context.get('user_obj')

        if not (authz.is_sysadmin(c.user) or c.user == user_obj.name):
            abort(403,
                  _('User %s not authorized to edit %s') % (str(c.user), id))

        errors = errors or {}
        vars = {
            'data': data,
            'errors': errors,
            'error_summary': error_summary,
            'origin': origin,
            'countries': allCountries,
            'occupations': occupations
        }

        self._setup_template_variables(
            {
                'model': model,
                'session': model.Session,
                'user': c.user
            }, data_dict)

        c.is_myself = True
        c.show_email_notifications = asbool(
            config.get('ckan.activity_streams_email_notifications'))
        c.form = render(self.edit_user_form, extra_vars=vars)

        return render('user/edit.html')
Ejemplo n.º 51
0
 def _get_action_request_data(self, api_action):
     function = toolkit.get_action(api_action)
     side_effect_free = getattr(function, 'side_effect_free', False)
     request_data = self._get_request_data(try_url_params=side_effect_free)
     return request_data
Ejemplo n.º 52
0
 def get_homepage_datasets(self, *args, **kwargs):
     psearch = toolkit.get_action("package_search")
     psearch_ret = psearch(data_dict={"sort": "timestamp desc", "rows": 5})
     results = psearch_ret['results']
     return results
Ejemplo n.º 53
0
class InitEurovoc(CkanCommand):
  """
  """
  summary = __doc__.split('\n')[0]
  usage = __doc__
  max_args = 0
  min_args = 0

  def __init__(self, name):
    super(InitEurovoc, self).__init__(name)

  def command(self):
    """
    """
    self._load_config()
    log = logging.getLogger(__name__)

    import ckan.model as model


    log.info('ADDING GROUPS (Eurovoc Domains)')
    root = ET.parse('ckanext/eurovoc/eurovoc_xml/dom_en.xml').getroot()

    for record in root.iter('RECORD'):
      id = record.find('DOMAINE_ID').text
      title = record.find('LIBELLE').text.title()
      name = slugify(title).lower()
      desc = 'Eurovoc Domain: ' + id + ' ' + title
      grp_dict = {'id': id, 'title': title, 'name': name, 'type': 'group', 'extras': [{'key': 'Eurovoc Domain', 'value': title}, {'key': 'Eurovoc Domain ID', 'value': id}]}
      log.info('Creating group: ' + id + ' - ' + title)
      context = {'user': '******', 'model': model, 'session': model.Session}
      try:
        toolkit.get_action('group_create')(context, grp_dict)
      except:
        pass

    log.info('ADDING VOCABULARY THESAURUS')

    context = {'user': '******', 'model': model, 'session': model.Session}
    voc_dict = {'name': 'eurovoc_thesaurus'}
    try:
      voc = toolkit.get_action('vocabulary_create')(context, voc_dict)
    except ValidationError, e:
      voc = toolkit.get_action('vocabulary_show')(context, {'id': 'eurovoc_thesaurus'})

    thesroot = ET.parse('ckanext/eurovoc/eurovoc_xml/thes_en.xml').getroot()

    for record in thesroot.iter('RECORD'):
      id = record.find('THESAURUS_ID').text
      title = record.find('LIBELLE').text.title()
      name = slugify(title)
      name_new = re.sub(r'(:S|\'[sS])', r's', title)
      name_new = re.sub(u'[^a-zA-Z0-9]', r' ', name_new)
      name_new = re.sub(u'\s+', r'-', name_new)
      log.info('Creating tag: ' + name_new)
      context = {'user': '******', 'model': model, 'session': model.Session}
      del_dict = {'id': name, 'vocabulary_id': voc['id']}
      try:
        toolkit.get_action('tag_delete')(context, del_dict)
      except:
        pass
      del_dict['id'] = name_new
      try:
        toolkit.get_action('tag_delete')(context, del_dict)
      except:
        pass
      tag_dict = {'name': name_new, 'vocabulary_id': voc['id']}
      context = {'user': '******', 'model': model, 'session': model.Session}
      toolkit.get_action('tag_create')(context, tag_dict)
Ejemplo n.º 54
0
def dataontosearch_dataset_search(context, data_dict):
    '''
    Perform a semantic search using DataOntoSearch.

    The parameters and returned JSON is designed to be compatible with the
    regular search (package_search) as far as possible, though only the q
    parameter is supported. Some additional information from DataOntoSearch is
    also available.

    :param q: the query to use when searching
    :type q: string
    :rtype: dictionary with 'concepts' that matched the query, a 'count' of
        results and 'results' with a list of datasets that matched. For each
        dataset, their similarity 'score' and similar 'concepts' are available
        in addition to the usual information given in package_show. For each
        concept, their RDF IRI is available as 'uri', human-readable label as
        'label' and similarity score as 'similarity'
    '''
    toolkit.check_access(u'dataontosearch_dataset_search', context, data_dict)

    query = toolkit.get_or_bust(data_dict, u'q')

    parameters = {
        u'q': query,
        u'd': 0,
    }
    if get_use_autotag():
        parameters[u'a'] = 1

    r = make_search_get_request(u'/search', parameters)
    r.raise_for_status()
    data = r.json()

    results = data[u'results']
    query_concepts = data[u'concepts']

    processed_results = []

    for result in results:
        # Extract the ID of this dataset
        dataset_id = result[u'uri'].split(u'/')[-1]

        # Fetch information about this dataset
        try:
            dataset_info = toolkit.get_action(u'package_show')(
                None, {
                    u'id': dataset_id,
                })
        except toolkit.ObjectNotFound:
            # Perhaps not part of this CKAN? This should generally not happen,
            # and can indicate some trouble with configurations in
            # DataOntoSearch or changed ID or name in CKAN
            logger.warning(
                u'Skipping dataset %(uri)s returned from DataOntoSearch, not '
                u'found in CKAN', {u'uri': result[u'uri']},
                exc_info=True)
            continue
        except toolkit.NotAuthorized:
            # This may be a private dataset or something, so don't show it
            # TODO: Does CKAN reveal that the dataset exists, but is private?
            logger.debug(
                u'Skipping dataset %(uri)s since user is not authorized to see '
                u'it', {u'uri': result[u'uri']},
                exc_info=True)
            continue

        # Enrich with information from DataOntoSearch's result
        extra_info = {
            u'concepts': result[u'concepts'],
            u'score': result[u'score'],
        }
        dataset_info.update(extra_info)

        # Processed!
        processed_results.append(dataset_info)

    return {
        u'count': len(processed_results),
        u'results': processed_results,
        u'concepts': query_concepts,
        # Include dummy data for keys present in package_search
        u'sort': u'',
        u'facets': {},
        u'search_facets': {}
    }
Ejemplo n.º 55
0
    def publish(self, doi, pkg=None, context={}, *args, **kwargs):

        update_doi = kwargs.get('update', False)

        # dataset data
        package_id = pkg['id']
        url = config.get('ckan.site_url', '') + '/dataset/' + pkg.get(
            'name', pkg['id'])

        if self.url_prefix:
            url = self.url_prefix + pkg.get('name', pkg['id'])

        if update_doi:
            log.debug("*** Updating id = {0}, url = {1}".format(
                package_id, url))
            # check published data match
            published_ids = self.get_doi_identifiers(doi)
            if published_ids and package_id not in published_ids and pkg.get(
                    'name') not in published_ids:
                return None, 'Dataset id ({0}, {1}) do not match published ids: [{2}]'.format(
                    package_id, pkg.get('name'), ', '.join(published_ids))
        else:
            log.debug("Publishing id = {0}, url = {1}".format(package_id, url))

        # get converted package
        metadata_format = 'datacite'

        try:
            converted_package = toolkit.get_action('package_export')(
                context, {
                    'id': package_id,
                    'format': metadata_format
                })
        except toolkit.ObjectNotFound:
            return None, 'Dataset not found'

        xml = converted_package.replace('\n', '').replace('\t', '')

        # Validate
        try:
            converted_record = XMLRecord.from_record(
                Record(
                    MetadataFormats().get_metadata_formats(metadata_format)[0],
                    xml))
            validation_result = converted_record.validate()
            log.debug("Validation result: {0}".format(validation_result))
        except Exception as e:
            log.error("Converted Validation FAILED, exception: {0}".format(e))
            traceback.print_exc()
            validation_result = False

        if not validation_result:
            return None, 'Dataset XML validation failed'

        # encode 64
        xml_bytes = xml
        if isinstance(xml, str):
            xml_bytes = xml.encode('utf-8')
        xml_encoded = base64.b64encode(xml_bytes)

        # prepare JSON
        headers = {"Content-Type": "application/vnd.api+json"}
        auth = HTTPBasicAuth(self.account_name, self.account_password)

        data = collections.OrderedDict()
        data['id'] = doi
        data['type'] = 'dois'
        data['attributes'] = collections.OrderedDict()
        # TODO check for update if this state is correct
        if update_doi:
            data['attributes']['event'] = ""
        else:
            data['attributes']['event'] = "publish"
        data['attributes']['doi'] = doi
        data['attributes']['url'] = url
        data['attributes']['xml'] = xml_encoded.decode()
        args = {'data': data}

        args_json = json.dumps(args)
        # log.debug(args_json)

        datacite_url_endpoint = self.datacite_url
        if update_doi:
            datacite_url_endpoint = self.datacite_url + '/' + doi
        log.debug(
            " REST request send to URL: {0}".format(datacite_url_endpoint))

        if update_doi:
            r = requests.put(datacite_url_endpoint,
                             headers=headers,
                             auth=auth,
                             data=args_json)
        else:
            r = requests.post(datacite_url_endpoint,
                              headers=headers,
                              auth=auth,
                              data=args_json)

        # print(r.status_code)
        # print(r.json())

        if r.status_code == 201 or r.status_code == 200:
            published_doi = r.json().get('data').get('id')
            return published_doi, None
        else:
            if update_doi:
                return None, 'Error updating to DataCite: HTTP Code: {0}, error: {1}'.format(
                    r.status_code, r.json())
            else:
                return None, 'Error publishing to DataCite: HTTP Code: {0}, error: {1}'.format(
                    r.status_code, r.json())
Ejemplo n.º 56
0
    def identify(self):
        '''Identify which user (if any) is logged in via simple SSO header.
        If a logged in user is found, set toolkit.c.user to be their user name.
        '''
        logger = logging.getLogger(__name__)
        shib_partyid = request.headers.get('partyId')
        logger.debug(request.headers)
        if not getattr(c, u'user', None):
            c.user = None
        if shib_partyid is None:
            logger.debug("ESAS Identity not Found in HEADER")
        if shib_partyid is not None and c.user is None:
            logger.debug("ESAS Identity Found in Header")
            shib_email = request.headers.get('email')
            gives = text_to_id(request.headers.get('Legalgivennames'))
            fams = text_to_id(request.headers.get('Legalfamilyname'))
            nonumgives = re.sub('[0-9]+', '', gives)
            nonumfam = re.sub('[0-9]+', '', fams)
            shib_username = (alphabet_position(gives) + '_' +
                             alphabet_position(fams)).lower()
            shib_fullname = nonumgives + ' ' + nonumfam
            logger.debug("partyId = \"{0}\"".format(shib_partyid))
            logger.debug("email = \"{0}\"".format(shib_email))
            logger.debug("username = \"{0}\"".format(shib_username))
            logger.debug("fullname = \"{0}\"".format(shib_fullname))
            check_user = get_user_by_userid(shib_partyid)
            # ESAS user is logged in and exists in CKAN
            if c.user and c.user == check_user['name']:
                logger.debug(
                    "User logged in already username = \"{0}\"".format(
                        user['name']))
                # Check if ESAS email for user has changed.
                # If it has changed then update user email to match
                # CKAN is not system of record for email.
                # Changes as needed to match ESAS header.
                current_email = get_email_by_userid(shib_partyid)
                if shib_email != current_email:
                    logger.info("ESAS: A user account has changed email.")
                    check_user = toolkit.get_action('user_update')(
                        context={
                            'ignore_auth': True,
                            'user': '******'
                        },
                        data_dict={
                            'id': shib_partid,
                            'email': shib_email
                        })
            elif c.user and c.user != check_user['name']:
                # User already logged in and ESAS header does not match
                logger.info(
                    "ESAS: User already logged in to CKAN - \"{0}\"".format(
                        c.user['name']))
                logger.info("ESAS: Username in header - \"{0}\"".format(
                    c.user['name']))
                logger.info("ESAS: User being set to username in ESAS header.")

            elif check_user is not None and c.user is None:
                # User not logged in and ESAS header exists
                c.user = check_user['name']
            else:
                # A user with this username doesn't yet exist in CKAN
                # - so create one.
                logger.info("ESAS: user not found. Creating new CKAN user.")
                check_user = toolkit.get_action('user_create')(
                    context={
                        'ignore_auth': False,
                        'user': '******'
                    },
                    data_dict={
                        'email': shib_email,
                        'id': shib_partyid,
                        'name': shib_username,
                        'fullname': shib_fullname,
                        'password': generate_password()
                    })
                logger.debug("username = \"{0}\"".format(check_user['name']))
Ejemplo n.º 57
0
 def _check_access(self, id):
     context = self._context()
     pkg = tk.get_action('package_show')(context, {'id': id})
     if not check_journal_role(pkg, 'admin') and not h.check_access('sysadmin'):
         tk.abort(401, 'Unauthorized to manage DOIs')
Ejemplo n.º 58
0
    def register(self, id, template):
        """
        register at da|ra
        """

        self._check_access(id)
        context = self._context()

        if params()['test'] or params()['test_register']:
            doi_key = 'dara_DOI_Test'
            a = {201: ('dara_registered_test', 'Dataset registered (Test)'),
                 200: ('dara_updated_test', 'Dataset updated (Test)')}
        else:
            doi_key = 'dara_DOI'
            a = {201: ('dara_registered', 'Dataset registered'),
                 200: ('dara_updated', 'Dataset updated')}

        def store():
            d = doi.pkg_doi(c.pkg_dict)
            c.pkg_dict.update({doi_key: d})
            date = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.now())
            k = get_in([dara, 0], a)
            c.pkg_dict[k] = date
            tk.get_action('package_update')(context, c.pkg_dict)

        def response():
            if dara in a.iterkeys():
                store()
                h.flash_success(get_in([dara, 1], a))
            else:
                h.flash_error("ERROR! Sorry, dataset has not been registered or\
                          updated. Please contact your friendly sysadmin. ({})\
                          ".format(dara))
            tk.redirect_to('dara_doi', id=id)

        def register_resources():
            def reg(resource):
                resource_id = resource['id']
                c.resource = tk.get_action('resource_show')(context, {'id': resource_id})
                xml = self.xml(id, 'package/resource.xml')
                dara = darapi(auth(), xml, test=params()['test'],
                        register=params()['register'])
                if dara in a.iterkeys():
                    c.resource[doi_key] = doi.res_doi(c.resource)
                    tk.get_action('resource_update')(context, c.resource)
                else:
                    h.flash_error("ERROR! Resource {} could not be registered ({}).\
                            Dataset has not been registered".format(resource_id, dara))
                    tk.redirect_to('dara_doi', id=id)

            c.pkg_dict = tk.get_action('package_show')(context, {'id': id})
            resources = filter(lambda res: res['id'] in tk.request.params,
                    c.pkg_dict['resources'])
            map(reg, resources)

        # first register resources
        register_resources()

        # register package. we must first get the pkg with the updated resources to
        # get their DOIs/URLs
        c.pkg_dict = tk.get_action('package_show')(context, {'id': id})
        dara = darapi(auth(), self.xml(id, template),
                    test=params()['test'], register=params()['register'])
        response()
Ejemplo n.º 59
0
 def _get_action(cls, action_name):
     return tk.get_action(action_name)
Ejemplo n.º 60
0
def get_harvester_count():
    return len(get_action('harvest_source_list')({}, {'all': True}))