def insurers(year='*'): """ Returns the insurers of the given or the latest year. """ year = year or latest_year() assert year return session.query(Insurer)
def distinct_insurers(): """ Returns an array with all insurers containing insurer_id and name, with values from multiple years merged into. (SELECT DISTINCT) """ query = session.query(Insurer.insurer_id, Insurer.name) return query.order_by(Insurer.name).distinct().all()
def insurance_types(year=None): """ Returns the possible insurance types by year, with the latest year as default. """ year = year or latest_year() assert year query = session.query(Premium.insurance_type) query = query.filter(Premium.year == year) query = query.distinct() query = query.order_by(Premium.insurance_type) return unpack(query.all())
def years(): """ Returns the years available in the database. Not to be confused with ship.db.load.available_years which resturns the years available as rawdata files. The years are returned in descending order. """ # check the smallest table as all table should contain data for # each possible year query = session.query(Insurer.year).distinct() query = query.order_by(desc(Insurer.year)) return unpack(query.all())
def franchises(age=None, year=None): """ Returns a list of possible franchises for the given or the latest year. Since kids may have different franchises than adults the list is further reduced if the age in question is passed. """ year = year or latest_year() assert year query = session.query(Premium.franchise) query = query.filter(Premium.year == year) if age is not None: query = query.filter(Premium.age_group == age_group(age)) query = query.distinct().order_by(Premium.franchise) return unpack(query.all())
def insurer(self): query = session.query(Insurer) query = query.filter(Insurer.insurer_id == self.insurer_id) query = query.filter(Insurer.year == self.year) return query.one()
def __init__(self, query=None): self.q = query or session.query(Premium)
def __init__(self, query=None): self.q = query or session.query(Town)
def countries(): """ Returns a list of available countries.""" query = session.query(Premium.country) query = query.filter(Premium.group == 'EU') return unpack(query.distinct().all())
def cantons(): """ Returns a list of available cantons.""" query = session.query(Premium.canton) query = query.filter(Premium.group == 'CH') return unpack(query.distinct().all())
def __call__(self, year='*', update=False, limit=0): # limit is for testing only if year and year != '*': # continue with year if file is found csv_path = file_path(year, self.type) if not csv_path: return 0 else: # no year given, go through all years results = [] for year in available_years(): results.append(self.__call__(year, update, limit)) return sum(results) # if there's a record of the given year, maybe stop if not update: if self.model is Premium: query = session.query(Premium).filter(Premium.year == year) query = query.filter(Premium.group == self.type.upper()) else: query = session.query(self.model).filter( self.model.year == year ) if query.first(): return 0 # the csv module does not support unicode so we need to decode # unicode strings on the fly def lines(): decode = lambda s: s.decode('utf-8') _lines = csv.reader(open(csv_path, 'rb'), delimiter=';') for ix, line in enumerate(_lines): yield map(decode, line) lineindex = 0 # load lines chunked to keep the memory usage under control for chunk in chunked(lines(), 1000): try: for line in chunk: lineindex += 1 try: obj = self.factory(line) obj.year = year except ValueError: # there are a number of empty lines in the rawdata # files which we can safely skip over logger.warn("invalid line %i in %s, skipping" % ( lineindex, csv_path )) continue if update: session.merge(obj) else: session.add(obj) except: session.rollback() raise else: # I cannot for the life of me figure out why sqlite won't # accept nested transactions here. So I make due with # sequential commits session.commit() # respect the limit, which is aligned to the chunk steps if limit > 0 and lineindex > limit: break return 1
def __call__(self, year='*', update=False, limit=0): # limit is for testing only if year and year != '*': # continue with year if file is found csv_path = file_path(year, self.type) if not csv_path: return 0 else: # no year given, go through all years results = [] for year in available_years(): results.append(self.__call__(year, update, limit)) return sum(results) # if there's a record of the given year, maybe stop if not update: if self.model is Premium: query = session.query(Premium).filter(Premium.year == year) query = query.filter(Premium.group == self.type.upper()) else: query = session.query( self.model).filter(self.model.year == year) if query.first(): return 0 # the csv module does not support unicode so we need to decode # unicode strings on the fly def lines(): decode = lambda s: s.decode('utf-8') _lines = csv.reader(open(csv_path, 'rb'), delimiter=';') for ix, line in enumerate(_lines): yield map(decode, line) lineindex = 0 # load lines chunked to keep the memory usage under control for chunk in chunked(lines(), 1000): try: for line in chunk: lineindex += 1 try: obj = self.factory(line) obj.year = year except ValueError: # there are a number of empty lines in the rawdata # files which we can safely skip over logger.warn("invalid line %i in %s, skipping" % (lineindex, csv_path)) continue if update: session.merge(obj) else: session.add(obj) except: session.rollback() raise else: # I cannot for the life of me figure out why sqlite won't # accept nested transactions here. So I make due with # sequential commits session.commit() # respect the limit, which is aligned to the chunk steps if limit > 0 and lineindex > limit: break return 1