Python HarvestJob.filter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ckanext.harvestodm.model

클래스/타입: HarvestJob

메소드/함수: filter

hotexamples.com에서의 예제들: 3

Python HarvestJob.filter - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ckanext.harvestodm.model.HarvestJob.filter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get(5)

filter(3)

save(1)

source(1)

자주 사용되는 메소드들

get (5)

filter (3)

save (1)

source (1)

예제 #1

파일 보기

파일: plugin.py 프로젝트: opendatamonitor/ckanext-harvestodm

def _delete_harvest_source_object(context, data_dict):
    '''
        Deletes an actual HarvestSource object with the id provided on the
        data dict of the harvest_source dataset. Similarly to the datasets,
        the source object is not actually deleted, just flagged as inactive.
        All validation and authorization checks should be used by now, so
        this function is not to be used directly to delete harvest sources.

        :param data_dict: A standard package data_dict

        :returns: The deleted HarvestSource object
        :rtype: HarvestSource object
    '''

    source_id = data_dict.get('id')

    log.info('Deleting harvest source: %s', source_id)
    db = client.odm
    collection=db.jobs
    document=collection.remove({"base_url":data_dict['url']})

    source = HarvestSource.get(source_id)
    if not source:
        log.warn('Harvest source %s does not exist', source_id)
        raise p.toolkit.ObjectNotFound('Harvest source %s does not exist' % source_id)

    # Don't actually delete the record, just flag it as inactive
    source.active = False
    source.save()

    # Abort any pending jobs
    jobs = HarvestJob.filter(source=source, status=u'New')
    if jobs:
        log.info('Aborting %i jobs due to deleted harvest source', jobs.count())
        for job in jobs:
            job.status = u'Aborted'
            job.save()

    log.debug('Harvest source %s deleted', source_id)

    return source

예제 #2

파일 보기

파일: plugin.py 프로젝트: opendatamonitor/ckanext-harvestodm

def _update_harvest_source_object(context, data_dict):
    '''
        Updates an actual HarvestSource object with the data dict
        of the harvest_source dataset. All validation and authorization
        checks should be used by now, so this function is not to be used
        directly to update harvest sources.

        :param data_dict: A standard package data_dict

        :returns: The created HarvestSource object
        :rtype: HarvestSource object
    '''
    language_mappings={'English':'en','Bulgarian':'bg','Croatian':'hr','Czech':'cs','Danish':'da','Icelandic':'is','German':'de','Greek':'el','Spanish':'es','Estonian':'et','Finnish':'fi','French':'fr','Hungarian':'hu','Italian':'it','Lithuanian':'lt','Latvian':'lv','Maltese':'mt','Dutch':'nl','Polish':'pl','Portuguese':'pt','Romanian':'ro','Slovak':'sk','Swedish':'sv','Ukrainian':'uk','Norwegian':'no'}
    source_id = data_dict.get('id')
    log.info('Harvest source %s update: %r', source_id, data_dict)
    source = HarvestSource.get(source_id)
    if not source:
        log.error('Harvest source %s does not exist', source_id)
        raise logic.NotFound('Harvest source %s does not exist' % source_id)


    fields = ['url', 'title', 'description', 'user_id',
              'publisher_id', 'frequency']
    for f in fields:
        if f in data_dict and data_dict[f] is not None:
            if f == 'url':
                data_dict[f] = data_dict[f].strip()
            source.__setattr__(f,data_dict[f])

    # Avoids clashes with the dataset type
    if 'source_type' in data_dict:
        source.type = data_dict['source_type']

    if 'config' in data_dict:
        source.config = data_dict['config']

    # Don't change state unless explicitly set in the dict
    if 'state' in data_dict:
      source.active = data_dict.get('state') == 'active'

    # Don't commit yet, let package_create do it
    source.add()

    # Abort any pending jobs
    if not source.active:
        jobs = HarvestJob.filter(source=source,status=u'New')
        log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
        if jobs:
            for job in jobs:
                job.status = u'Aborted'
                job.add()

    client=pymongo.MongoClient(str(mongoclient),int(mongoport))
    db=client.odm
    db_jobs=db.jobs
    if source.type=='html':
	  if 'http' in source.url and 'https' not in source.url :
			  base_url1=source.url[7:]
			  if '/' in base_url1:
				base_url1=base_url1[:base_url1.find('/')]
			  base_url='http://'+str(base_url1)

	  if 'https' in source.url:
			  base_url1=source.url[8:]
			  if '/' in base_url1:
				base_url1=base_url1[:base_url1.find('/')]
			  base_url='https://'+str(base_url1)
    else: base_url=source.url
    #try:
    print(base_url)
    job1=db_jobs.find_one({"cat_url":base_url})
    if job1!=None:
       
    #except:
	  #pass
    
       job={"cat_url":str(base_url),"base_url":str(source.url),"type":str(source.type),"id":str(source.id),"description":str(job1['description']),"frequency":str(source.frequency),
		 "title":str(source.title),'country':str(data_dict['__extras']['catalogue_country']),'language':language_mappings[str(data_dict['__extras']['language'])],'catalogue_date_created':str(data_dict['__extras']['catalogue_date_created']),
		 'catalogue_date_updated':str(data_dict['__extras']['catalogue_date_updated']),'user':str(job1['user'])}
       if 'harmonisation' in job1.keys():
          job.update({'harmonisation':job1['harmonisation']})
       if 'official' in job1.keys():
          job.update({'official':job1['official']})
       if 'date_harvested' in job1.keys():
          job.update({'date_harvested':job1['date_harvested']})
       else:
          job.update({'date_harvested':datetime.datetime.now()})
       db_jobs.remove({'id':job1['id']})
       db_jobs.save(job)
       

    return source

예제 #3

파일 보기

파일: dictization.py 프로젝트: opendatamonitor/ckanext-harvestodm

def _get_source_status(source, context):
    '''
    TODO: Deprecated, use harvest_source_show_status instead
    '''

    model = context.get('model')
    detailed = context.get('detailed',True)

    out = dict()

    job_count = HarvestJob.filter(source=source).count()

    out = {
           'job_count': 0,
           'next_harvest':'',
           'last_harvest_request':'',
           'last_harvest_statistics':{'added':0,'updated':0,'errors':0,'deleted':0},
           'last_harvest_errors':{'gather':[],'object':[]},
           'overall_statistics':{'added':0, 'errors':0},
           'packages':[]}

    if not job_count:
        out['msg'] = 'No jobs yet'
        return out
    else:
        out['job_count'] = job_count

    # Get next scheduled job
    next_job = HarvestJob.filter(source=source,status=u'New').first()
    if next_job:
        out['next_harvest'] = 'Scheduled'
    else:
        out['next_harvest'] = 'Not yet scheduled'

    # Get the last finished job
    last_job = HarvestJob.filter(source=source,status=u'Finished') \
               .order_by(HarvestJob.created.desc()).first()

    if last_job:
        #TODO: Should we encode the dates as strings?
        out['last_harvest_request'] = str(last_job.gather_finished)

        if detailed:
            harvest_job_dict = harvest_job_dictize(last_job, context)
                # No packages added or updated
            statistics = out['last_harvest_statistics']
            statistics['added'] = harvest_job_dict['stats'].get('new',0)
            statistics['updated'] = harvest_job_dict['stats'].get('updated',0)
            statistics['deleted'] = harvest_job_dict['stats'].get('deleted',0)
            statistics['errors'] = (harvest_job_dict['stats'].get('errored',0) +
                                    len(last_job.gather_errors))

        if detailed:
            # We have the gathering errors in last_job.gather_errors, so let's also
            # get also the object errors.
            object_errors = model.Session.query(HarvestObjectError).join(HarvestObject) \
                                .filter(HarvestObject.job==last_job)
            for gather_error in last_job.gather_errors:
                out['last_harvest_errors']['gather'].append(gather_error.message)

            for object_error in object_errors:
                err = {'object_id':object_error.object.id,'object_guid':object_error.object.guid,'message': object_error.message}
                out['last_harvest_errors']['object'].append(err)

        # Overall statistics
        packages = model.Session.query(distinct(HarvestObject.package_id),Package.name) \
                .join(Package).join(HarvestSource) \
                .filter(HarvestObject.source==source) \
                .filter(HarvestObject.current==True) \
                .filter(Package.state==u'active')

        out['overall_statistics']['added'] = packages.count()
        if detailed:
            for package in packages:
                out['packages'].append(package.name)

        gather_errors = model.Session.query(HarvestGatherError) \
                .join(HarvestJob).join(HarvestSource) \
                .filter(HarvestJob.source==source).count()

        object_errors = model.Session.query(HarvestObjectError) \
                .join(HarvestObject).join(HarvestJob).join(HarvestSource) \
                .filter(HarvestJob.source==source).count()
        out['overall_statistics']['errors'] = gather_errors + object_errors
    else:
        out['last_harvest_request'] = 'Not yet harvested'

    return out