Example #1
0
File: get.py Project: tbalaz/test
def harvest_source_show(context,data_dict):
    '''
    Returns the metadata of a harvest source

    This method just proxies the request to package_show. All auth checks and
    validation will be done there.

    :param id: the id or name of the harvest source
    :type id: string

    :returns: harvest source metadata
    :rtype: dictionary
    '''
    check_access('harvest_source_show',context,data_dict)

    id = data_dict.get('id')
    attr = data_dict.get('attr',None)

    source = HarvestSource.get(id,attr=attr)
    context['source'] = source

    if not source:
        raise NotFound

    if 'include_status' not in context:
        context['include_status'] = True

    return harvest_source_dictize(source,context)
Example #2
0
def harvest_source_create(context,data_dict):

    log.info('Creating harvest source: %r', data_dict)
    check_access('harvest_source_create',context,data_dict)

    model = context['model']
    session = context['session']
    schema = context.get('schema') or default_harvest_source_schema()

    data, errors = validate(data_dict, schema)

    if errors:
        session.rollback()
        log.warn('Harvest source does not validate: %r', errors)
        raise ValidationError(errors,_error_summary(errors))

    source = HarvestSource()
    source.url = data['url'].strip()
    source.type = data['type']

    opt = ['active','title','description','user_id','publisher_id','config']
    for o in opt:
        if o in data and data[o] is not None:
            source.__setattr__(o,data[o])

    if 'active' in data_dict:
        source.active = data['active']

    source.save()
    log.info('Harvest source created: %s', source.id)

    return harvest_source_dictize(source,context)
Example #3
0
def harvest_source_for_a_dataset(context, data_dict):
    """
    TODO: Deprecated, harvest source id is added as an extra to each dataset
    automatically
    """
    """For a given dataset, return the harvest source that
    created or last updated it, otherwise NotFound."""

    model = context["model"]
    session = context["session"]

    dataset_id = data_dict.get("id")

    query = (
        session.query(HarvestSource)
        .join(HarvestObject)
        .filter_by(package_id=dataset_id)
        .order_by(HarvestObject.gathered.desc())
    )
    source = query.first()  # newest

    if not source:
        raise NotFound

    return harvest_source_dictize(source, context)
Example #4
0
def harvest_source_create(context, data_dict):

    log.info('Creating harvest source: %r', data_dict)
    check_access('harvest_source_create', context, data_dict)

    model = context['model']
    session = context['session']
    schema = context.get('schema') or default_harvest_source_schema()

    data, errors = validate(data_dict, schema)

    if errors:
        session.rollback()
        log.warn('Harvest source does not validate: %r', errors)
        raise ValidationError(errors, _error_summary(errors))

    source = HarvestSource()
    source.url = data['url'].strip()
    source.type = data['type']

    opt = [
        'active', 'title', 'description', 'user_id', 'publisher_id', 'config'
    ]
    for o in opt:
        if o in data and data[o] is not None:
            source.__setattr__(o, data[o])

    if 'active' in data_dict:
        source.active = data['active']

    source.save()
    log.info('Harvest source created: %s', source.id)

    return harvest_source_dictize(source, context)
Example #5
0
File: get.py Project: tbalaz/test
def harvest_source_list(context, data_dict):

    user = context.get('user')

    check_access('harvest_source_list',context,data_dict)

    sources = _get_sources_for_user(context, data_dict)

    context['detailed'] = False

    return [harvest_source_dictize(source, context) for source in sources]
Example #6
0
def harvest_source_show(context, data_dict):
    check_access('harvest_source_show', context, data_dict)

    id = data_dict.get('id')
    attr = data_dict.get('attr', None)

    source = HarvestSource.get(id, attr=attr)

    if not source:
        raise NotFound

    return harvest_source_dictize(source, context)
Example #7
0
def harvest_source_update(context, data_dict):

    check_access('harvest_source_update', context, data_dict)

    model = context['model']
    session = context['session']

    source_id = data_dict.get('id')
    schema = context.get('schema') or default_harvest_source_schema()

    log.info('Harvest source %s update: %r', source_id, data_dict)
    source = HarvestSource.get(source_id)
    if not source:
        log.error('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    data, errors = validate(data_dict, schema)

    if errors:
        session.rollback()
        raise ValidationError(errors, _error_summary(errors))

    fields = ['url', 'title', 'type', 'description', 'user_id', 'publisher_id']
    for f in fields:
        if f in data and data[f] is not None:
            if f == 'url':
                data[f] = data[f].strip()
            source.__setattr__(f, data[f])

    if 'active' in data_dict:
        source.active = data['active']

    if 'config' in data_dict:
        source.config = data['config']

    source.save()
    # Abort any pending jobs
    if not source.active:
        jobs = HarvestJob.filter(source=source, status=u'New')
        log.info(
            'Harvest source %s not active, so aborting %i outstanding jobs',
            source_id, jobs.count())
        if jobs:
            for job in jobs:
                job.status = u'Aborted'
                job.save()

    # Ensure sqlalchemy writes to the db immediately, since the gather/fetch
    # runs in a different process and needs the latest source info. Not sure if
    # this works, but try it.
    model.repo.commit_and_remove()

    return harvest_source_dictize(source, context)
Example #8
0
def harvest_source_show(context,data_dict):
    p.toolkit.check_access('harvest_source_show',context,data_dict)

    id = data_dict.get('id')
    attr = data_dict.get('attr',None)

    source = HarvestSource.get(id,attr=attr)

    if not source:
        raise NotFound

    return harvest_source_dictize(source,context)
Example #9
0
def harvest_source_list(context, data_dict):
    '''
    TODO: Use package search
    '''

    check_access('harvest_source_list', context, data_dict)

    sources = _get_sources_for_user(context, data_dict)

    last_job_status = p.toolkit.asbool(data_dict.get('return_last_job_status', False))

    return [harvest_source_dictize(source, context, last_job_status) for source in sources]
Example #10
0
def harvest_source_list(context, data_dict):

    p.toolkit.check_access('harvest_source_list',context,data_dict)

    model = context['model']
    session = context['session']
    user = context.get('user','')

    sources = _get_sources_for_user(context, data_dict)

    context.update({'detailed':False})
    return [harvest_source_dictize(source, context) for source in sources]
Example #11
0
def harvest_source_list(context, data_dict):

    check_access('harvest_source_list', context, data_dict)

    model = context['model']
    session = context['session']
    user = context.get('user', '')

    sources = _get_sources_for_user(context, data_dict)

    context.update({'detailed': False})
    return [harvest_source_dictize(source, context) for source in sources]
Example #12
0
def harvest_source_update(context,data_dict):

    check_access('harvest_source_update',context,data_dict)

    model = context['model']
    session = context['session']

    source_id = data_dict.get('id')
    schema = context.get('schema') or default_harvest_source_schema()

    log.info('Harvest source %s update: %r', source_id, data_dict)
    source = HarvestSource.get(source_id)
    if not source:
        log.error('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    data, errors = validate(data_dict, schema)

    if errors:
        session.rollback()
        raise ValidationError(errors,_error_summary(errors))

    fields = ['url','title','type','description','user_id','publisher_id']
    for f in fields:
        if f in data and data[f] is not None:
            if f == 'url':
                data[f] = data[f].strip()
            source.__setattr__(f,data[f])

    if 'active' in data_dict:
        source.active = data['active']

    if 'config' in data_dict:
        source.config = data['config']

    source.save()
    # Abort any pending jobs
    if not source.active:
        jobs = HarvestJob.filter(source=source,status=u'New')
        log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
        if jobs:
            for job in jobs:
                job.status = u'Aborted'
                job.save()

    # Ensure sqlalchemy writes to the db immediately, since the gather/fetch
    # runs in a different process and needs the latest source info. Not sure if
    # this works, but try it.
    model.repo.commit_and_remove()

    return harvest_source_dictize(source,context)
Example #13
0
def harvest_source_list(context, data_dict):
    '''
    TODO: Use package search
    '''

    check_access('harvest_source_list', context, data_dict)

    model = context['model']
    session = context['session']
    user = context.get('user', '')

    sources = _get_sources_for_user(context, data_dict)

    return [harvest_source_dictize(source, context) for source in sources]
Example #14
0
def harvest_source_list(context, data_dict):
    '''
    TODO: Use package search
    '''

    check_access('harvest_source_list',context,data_dict)

    model = context['model']
    session = context['session']
    user = context.get('user','')

    sources = _get_sources_for_user(context, data_dict)

    return [harvest_source_dictize(source, context) for source in sources]
Example #15
0
def harvest_source_list(context, data_dict):
    '''
    TODO: Use package search
    '''


    organization_id = data_dict.get('organization_id')
    limit = config.get('ckan.harvest.harvest_source_limit', 100)

    sources = _get_sources_for_user(context, data_dict, organization_id=organization_id, limit=limit)

    last_job_status = p.toolkit.asbool(data_dict.get('return_last_job_status', False))

    return [harvest_source_dictize(source, context, last_job_status) for source in sources]
Example #16
0
def harvest_source_list(context, data_dict):
    """
    TODO: Use package search
    """

    check_access("harvest_source_list", context, data_dict)

    model = context["model"]
    session = context["session"]
    user = context.get("user", "")

    sources = _get_sources_for_user(context, data_dict)

    context.update({"detailed": False})
    return [harvest_source_dictize(source, context) for source in sources]
Example #17
0
def harvest_source_update(context,data_dict):

    check_access('harvest_source_update',context,data_dict)

    model = context['model']
    session = context['session']

    source_id = data_dict.get('id')
    schema = context.get('schema') or default_harvest_source_schema()

    log.info('Harvest source %s update: %r', source_id, data_dict)
    source = HarvestSource.get(source_id)
    if not source:
        log.error('Harvest source %s does not exist', source_id)
        raise NotFound('Harvest source %s does not exist' % source_id)

    data, errors = validate(data_dict, schema)

    if errors:
        session.rollback()
        raise ValidationError(errors,_error_summary(errors))

    fields = ['url','title','type','description','user_id','publisher_id']
    for f in fields:
        if f in data and data[f] is not None:
            source.__setattr__(f,data[f])

    if 'active' in data_dict:
        source.active = data['active']

    if 'config' in data_dict:
        source.config = data['config']

    source.save()
    # Abort any pending jobs
    if not source.active:
        jobs = HarvestJob.filter(source=source,status=u'New')
        log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
        if jobs:
            for job in jobs:
                job.status = u'Aborted'
                job.save()

    return harvest_source_dictize(source,context)
Example #18
0
def harvest_source_for_a_dataset(context, data_dict):
    '''For a given dataset, return the harvest source that
    created or last updated it, otherwise NotFound.'''

    model = context['model']
    session = context['session']

    dataset_id = data_dict.get('id')

    query = session.query(HarvestSource)\
            .join(HarvestObject)\
            .filter_by(package_id=dataset_id)\
            .order_by(HarvestObject.gathered.desc())
    source = query.first() # newest

    if not source:
        raise NotFound

    return harvest_source_dictize(source,context)
Example #19
0
def harvest_source_for_a_dataset(context, data_dict):
    '''For a given dataset, return the harvest source that
    created or last updated it, otherwise NotFound.'''

    model = context['model']
    session = context['session']

    dataset_id = data_dict.get('id')

    query = session.query(HarvestSource)\
            .join(HarvestObject)\
            .filter_by(package_id=dataset_id)\
            .order_by(HarvestObject.gathered.desc())
    source = query.first()  # newest

    if not source:
        raise NotFound

    return harvest_source_dictize(source, context)
Example #20
0
File: get.py Project: tbalaz/test
def harvest_source_for_a_dataset(context, data_dict):
    '''For a given dataset, return the harvest source that
    created or last updated it, otherwise NotFound.'''

    model = context['model']
    session = context['session']

    dataset_id = data_dict.get('id')

    query = session.query(HarvestSource)\
            .join(HarvestObject)\
            .filter_by(package_id=dataset_id)\
            .order_by(HarvestObject.gathered.desc())
    source = query.first() # newest

    if not source:
        raise NotFound

    if not context.get('include_status'):
        # By default we don't want to know the harvest
        # source status - this is an expensive call.
        context['include_status'] = False
    return harvest_source_dictize(source, context)