def add_job(self, job_item): """ Add job to PyJobsWeb database :param job_item: Scrapy pyjobs_crawlers item object :return: """ job_public_id = job_item['url'] if self.job_exist(job_public_id): print 'Skip existing item' return job = Job() attributes = ['title', 'description', 'company', 'address', 'company_url', 'publication_datetime', 'publication_datetime_is_fake'] # Populate job attributes if item contain it for attribute in attributes: if attribute in job_item: setattr(job, attribute, job_item[attribute]) job.url = job_item['url'] job.source = job_item['source'] job.crawl_datetime = job_item['initial_crawl_datetime'] if 'tags' in job_item: import json tags = [{'tag': t.tag, 'weight': t.weight} for t in job_item['tags']] job.tags = json.dumps(tags) DBSession.add(job) transaction.commit()
def save_item_as_job(item): # def uid(item): # return '{}--{}'.format(item['source'], item['source_local_uid']) # existing = DBSession.query(Job).filter(Job.url==item['url']).count() if existing: print 'Skip existing item' return job = Job() attributes = ['title', 'description', 'company', 'address', 'company_url', 'publication_datetime'] # Populate job attributes if item contain it for attribute in attributes: if attribute in item: setattr(job, attribute, item[attribute]) job.url = item['url'] job.crawl_datetime = item['initial_crawl_datetime'] if 'tags' in item: import json tags = [{'tag': t.tag, 'weight': t.weight} for t in item['tags']] job.tags = json.dumps(tags) DBSession.add(job) transaction.commit()