コード例 #1
0
ファイル: crawlers.py プロジェクト: PandaPoPy/web
    def add_job(self, job_item):
        """

        Add job to PyJobsWeb database

        :param job_item: Scrapy pyjobs_crawlers item object
        :return:
        """
        job_public_id = job_item['url']

        if self.job_exist(job_public_id):
            print 'Skip existing item'
            return

        job = Job()
        attributes = ['title', 'description', 'company', 'address', 'company_url',
                      'publication_datetime', 'publication_datetime_is_fake']

        # Populate job attributes if item contain it
        for attribute in attributes:
            if attribute in job_item:
                setattr(job, attribute, job_item[attribute])

        job.url = job_item['url']
        job.source = job_item['source']
        job.crawl_datetime = job_item['initial_crawl_datetime']

        if 'tags' in job_item:
            import json
            tags = [{'tag': t.tag, 'weight': t.weight} for t in job_item['tags']]
            job.tags = json.dumps(tags)

        DBSession.add(job)
        transaction.commit()
コード例 #2
0
ファイル: __init__.py プロジェクト: PandaPoPy/web
def save_item_as_job(item):
    # def uid(item):
    #     return '{}--{}'.format(item['source'], item['source_local_uid'])
    #
    existing = DBSession.query(Job).filter(Job.url==item['url']).count()
    if existing:
        print 'Skip existing item'
        return

    job = Job()
    attributes = ['title', 'description', 'company', 'address', 'company_url',
                  'publication_datetime']

    # Populate job attributes if item contain it
    for attribute in attributes:
        if attribute in item:
            setattr(job, attribute, item[attribute])

    job.url = item['url']
    job.crawl_datetime = item['initial_crawl_datetime']

    if 'tags' in item:
        import json
        tags = [{'tag': t.tag, 'weight': t.weight} for t in item['tags']]
        job.tags = json.dumps(tags)

    DBSession.add(job)
    transaction.commit()