def set_geolocation(cls, company_id, lat, lon): transaction.begin() DBSession.query(cls) \ .filter(cls.id == company_id) \ .update({'latitude': lat, 'longitude': lon, 'geolocation_is_valid': True}) transaction.commit()
def get_not_pushed_on_twitter(cls, limit=None): if limit: return DBSession.query(cls) \ .filter_by(pushed_on_twitter=False) \ .order_by(cls.id.asc()) \ .limit(limit) else: return DBSession.query(cls) \ .filter_by(pushed_on_twitter=False) \ .order_by(cls.id.asc())
def update_last_sync(cls, job_id, timestamp): transaction.begin() DBSession.query(cls) \ .filter(cls.id == job_id) \ .update({'last_sync': timestamp, 'last_modified': cls.last_modified}) DBSession.query(cls) \ .filter(cls.id == job_id) \ .filter(cls.last_modified < timestamp) \ .update({'last_modified': timestamp}) transaction.commit()
def rss(self, limit=50, source=None, *args, **kwargs): """ RSS feed of jobs :param source: source name :param limit: number of displayed jobs :return: RSS feed content """ site_url = config.get('site.domain_base_url') feed = feedgenerator.Rss201rev2Feed( title=u"pyjobs : le job qu'il vous faut en python", link=site_url, description=u"Agrégation de jobs python", language=u"fr", feed_url=u"http://www.pyjobs.fr/rss?limit=%s" % limit) jobs = DBSession.query(model.JobAlchemy) \ .order_by(model.JobAlchemy.publication_datetime.desc()) \ .limit(limit) if source is not None: jobs = jobs.filter(model.JobAlchemy.source == source) for job in jobs: job_slug = slugify(job.title) feed.add_item(title=job.title, link=get_job_url(job.id, job_title=job.title, absolute=True), description=job.description, pubdate=job.publication_datetime, unique_id="%s/job/%d/%s" % (site_url, job.id, job_slug)) return feed.writeString('utf-8')
def rss(self, limit=50, source=None): """ RSS feed of jobs :param source: source name :param limit: number of displayed jobs :return: RSS feed content """ site_url = config.get("site.domain_base_url") feed = feedgenerator.Rss201rev2Feed( title=u"PyJobs: Le job qu'il vous faut en python", link=site_url, description=u"Agrégation de jobs python", language=u"fr", feed_url=u"http://www.pyjobs.fr/rss?limit=%s" % limit, ) jobs = DBSession.query(Job).order_by(Job.publication_datetime.desc()).limit(limit) if source is not None: jobs = jobs.filter(Job.source == source) for job in jobs: job_slug = slugify(job.title) feed.add_item( title=job.title, link=job.url, description=job.description, pubdate=job.publication_datetime, unique_id="%s/job/%d/%s" % (site_url, job.id, job_slug), ) return feed.writeString("utf-8")
def rss(self, limit=50, source=None, *args, **kwargs): """ RSS feed of jobs :param source: source name :param limit: number of displayed jobs :return: RSS feed content """ site_url = config.get('site.domain_base_url') feed = feedgenerator.Rss201rev2Feed( title=u"pyjobs : le job qu'il vous faut en python", link=site_url, description=u"Agrégation de jobs python", language=u"fr", feed_url=u"http://www.pyjobs.fr/rss?limit=%s" % limit ) jobs = DBSession.query(model.JobAlchemy) \ .order_by(model.JobAlchemy.publication_datetime.desc()) \ .limit(limit) if source is not None: jobs = jobs.filter(model.JobAlchemy.source == source) for job in jobs: job_slug = slugify(job.title) feed.add_item( title=job.title, link=get_job_url(job.id, job_title=job.title, absolute=True), description=job.description, pubdate=job.publication_datetime, unique_id="%s/job/%d/%s" % (site_url, job.id, job_slug) ) return feed.writeString('utf-8')
def save_item_as_job(item): # def uid(item): # return '{}--{}'.format(item['source'], item['source_local_uid']) # existing = DBSession.query(Job).filter(Job.url==item['url']).count() if existing: print 'Skip existing item' return job = Job() attributes = ['title', 'description', 'company', 'address', 'company_url', 'publication_datetime'] # Populate job attributes if item contain it for attribute in attributes: if attribute in item: setattr(job, attribute, item[attribute]) job.url = item['url'] job.crawl_datetime = item['initial_crawl_datetime'] if 'tags' in item: import json tags = [{'tag': t.tag, 'weight': t.weight} for t in item['tags']] job.tags = json.dumps(tags) DBSession.add(job) transaction.commit()
def index(self, source=None): jobs = DBSession.query(Job).order_by(Job.publication_datetime.desc()) if source is not None: jobs = jobs.filter(Job.source == source) return dict(sources=SOURCES, jobs=jobs)
def job(self, job_id, job_title=None, previous=None): """ Job detail page :param job_id: Job identifier :param job_title: Job title (optional) for pretty url :return: dict """ try: job = DBSession.query(Job).filter_by(id=job_id).one() except NoResultFound: pass # TODO: TubroGears 404 ? return dict(job=job, sources=SOURCES)
def sources(self): sources_last_crawl = {} for source_name in SOURCES: try: sources_last_crawl[source_name] = ( DBSession.query(Log.datetime) .filter(Log.source == source_name) .order_by(Log.datetime.desc()) .limit(1) .one()[0] ) except NoResultFound: sources_last_crawl[source_name] = None return dict(sources=SOURCES, existing_fields=existing_fields, sources_last_crawl=sources_last_crawl)
def origine_des_annonces_diffusees(self, *args, **kwargs): sources_last_crawl = {} sorted_sources = collections.OrderedDict( sorted(SOURCES.items(), key=lambda x: x[1].label)) for source_name in sorted_sources: try: sources_last_crawl[source_name] = DBSession.query(Log.datetime) \ .filter(Log.source == source_name) \ .order_by(Log.datetime.desc()) \ .limit(1) \ .one()[0] except NoResultFound: sources_last_crawl[source_name] = None return dict(sources=sorted_sources, existing_fields=existing_fields, sources_last_crawl=sources_last_crawl)
def sources(self): sources_last_crawl = {} sorted_sources = collections.OrderedDict(sorted(SOURCES.items(), key=lambda x: x[1].label)) for source_name in sorted_sources: try: sources_last_crawl[source_name] = DBSession.query(Log.datetime) \ .filter(Log.source == source_name) \ .order_by(Log.datetime.desc()) \ .limit(1)\ .one()[0] except NoResultFound: sources_last_crawl[source_name] = None return dict( sources=sorted_sources, existing_fields=existing_fields, sources_last_crawl=sources_last_crawl )
def logs(self, source=None, last_days=1, *args, **kwargs): logs_query = DBSession.query(Log) \ .order_by(Log.datetime.desc()) \ .filter( Log.datetime >= datetime.datetime.now() + datetime.timedelta( days=-int(last_days))) \ .filter(Log.message.in_(('CRAWL_LIST_START', 'CRAWL_LIST_FINISHED', 'ERROR_UNEXPECTED_END', 'ERROR_CRAWNLING'))) if source is not None: logs_query = logs_query.filter(Log.source == source) return dict(sources=SOURCES, logs=logs_query.all(), last_days=last_days)
def logs(self, source=None, last_days=1): logs_query = DBSession.query(Log)\ .order_by(Log.datetime.desc())\ .filter(Log.datetime >= datetime.datetime.now() + datetime.timedelta(days=-int(last_days)))\ .filter(Log.message.in_(('CRAWL_LIST_START', 'CRAWL_LIST_FINISHED', 'ERROR_UNEXPECTED_END', 'ERROR_CRAWNLING'))) if source is not None: logs_query = logs_query.filter(Log.source == source) return dict( sources=SOURCES, logs=logs_query.all(), last_days=last_days )
def by_user_name(cls, username): """Return the user object whose user name is ``username``.""" return DBSession.query(cls).filter_by(user_name=username).first()
def by_email_address(cls, email): """Return the user object whose email address is ``email``.""" return DBSession.query(cls).filter_by(email_address=email).first()
def get_job_offer(cls, offer_id): return DBSession.query(cls).filter(cls.id == offer_id).one()
def test_query_obj(self): """Model objects can be queried""" obj = DBSession.query(self.klass).one() for key, value in self.attrs.items(): eq_(getattr(obj, key), value)
def get_invalid_addresses(cls): return DBSession.query(cls).filter_by(address_is_valid=False)
def get_company(cls, company_id): return DBSession.query(cls).filter(cls.id == company_id).one()
def job_offer_exists(cls, url): return DBSession.query(cls).filter(cls.url == url).count()
def set_address_is_valid(cls, company_id, is_valid): transaction.begin() DBSession.query(cls) \ .filter(cls.id == company_id) \ .update({'address_is_valid': is_valid}) transaction.commit()
def get_dirty_rows(cls): return DBSession.query(cls) \ .filter(cls.validated) \ .filter(cls.last_modified > cls.last_sync) \ .order_by(cls.id.asc())
def get_all_job_offers(cls): return DBSession.query(cls).order_by(cls.publication_datetime.desc())
def get_validated_companies(cls): return DBSession.query(cls) \ .filter_by(validated=True) \ .order_by(cls.name.asc())
def set_pushed_on_twitter(cls, offer_id, pushed_on_twitter): transaction.begin() DBSession.query(cls) \ .filter(cls.id == offer_id) \ .update({'pushed_on_twitter': pushed_on_twitter}) transaction.commit()
def reset_last_sync(cls): transaction.begin() DBSession.query(cls) \ .filter(cls.validated) \ .update({'last_sync': base_time()}) transaction.commit()
def get_validated_company(cls, company_id): return DBSession.query(cls) \ .filter(cls.id == company_id) \ .filter_by(validated=True) \ .one()
def get_pending_geolocations(cls): return DBSession.query(cls) \ .filter_by(address_is_valid=True) \ .filter_by(geolocation_is_valid=False) \ .filter_by(validated=True) \ .order_by(cls.id.asc())
def set_geolocation_is_valid(cls, offer_id, is_valid): transaction.begin() DBSession.query(cls) \ .filter(cls.id == offer_id) \ .update({'geolocation_is_valid': is_valid}) transaction.commit()