def load_into_db(self, normed_data): dburi = 'sqlite:///%s' % 'sqlite_ciafactbook.db' import db repo = db.Repository(dburi) # Series, Country, Value, Year countries = {} seriess = {} def goc(thedict, thekey, thetype): if not thekey in thedict: existing = thetype.query.filter_by(name=thekey).first() if not existing: thedict[thekey] = thetype(name=thekey) else: thedict[thekey] = existing return thedict[thekey] for row in normed_data: series = goc(seriess, row[0], db.Series) country = goc(countries, row[1], db.Country) val = db.Value( series=series, country=country, value=row[2], year=row[3], ) db.Session.flush()
def _process_sheet(self, tabular_data): import db repo = db.Repository('sqlite:///%s' % dbpath) years = range(2002, 2011) td = tabular_data cells = td.data title = cells[0][0] table = db.PesaTable(title=title) footnotes = [] for lastrow in reversed(cells): if len(lastrow) > 2: # into the data break foot = lastrow[0].strip() if foot: footnotes.append(foot) table.footnotes = simplejson.dumps(footnotes) entries = {} for row in cells[6:]: if row[1]: # not a subheading or footnote series_name = row[0] for (year, cell) in zip(years, row[1:10]): db.Expenditure( title=series_name, date=unicode(year), amount=swiss.floatify(cell), pesatable=table, ) db.Session.flush()
class TestRepository: repo = db.Repository(dburi) def test_domain_model(self): country = db.Country(code=1, name='Argentina') series = db.Series(code=694, name='GDP', is_goal=True) value = db.Value(country=country, series=series, year=1990, value=0.5) db.Session.flush() db.Session.clear() vals = db.Value.query.all() assert len(vals) == 1 assert vals[0].value == 0.5 assert vals[0].country.name == 'Argentina'
subf = CRA_DATA["functions/" + slugify(function) + "/" + slugify(subfunction)] g = Graph(identifier="%s" % subf) g.add((subf, RDF.type, CRA["SubFunction"])) g.add((subf, RDFS.label, Literal(subfunction))) g.add((subf, DC["title"], Literal(subfunction))) g.add((subf, DC["identifier"], Literal(slugify(subfunction)))) g.add((subf, CRA["function"], CRA_DATA["functions/" + slugify(function)])) g.add((subf, SCV["dataset"], cra)) return g if __name__ == '__main__': cache = swiss.Cache('cache') dburi = 'sqlite:///%s' % cache.cache_path('ukgov_finances_cra.db') db.Repository(dburi) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s:%(levelname)s - %(message)s") log = logging.getLogger("cra[rdf]") from py4s import FourStore store = FourStore("ckan") store.connect() cursor = store.cursor() t0 = datetime.now() cursor.delete_model(CRA_SCHEMA_URI) cursor.add_model(schema())
def load_normed_data_into_db(): fn = 'data.csv' import db repo = db.Repository(dburi) repo.load_normed_data(fn)
def load(self): ''' Looks like LA is very limited and is always associated with a given "department" -- so this is really a classifier for the account Simplest normalization: * years * dept FK Expenditure * subfunc * year * caporcur * region: usuals ones ... (ID or Non-ID not needed ...) * programme FK # does the same programme ever occur within two differnet departments? Programme * department Department? What questions do i want to ask: * Basically we want to browse in by facets * Region, func, subfunc, ... ''' import db fp = cache.retrieve(url) reader = csv.reader(open(fp)) # theoretically we'd have distributions to dept from CG as well ... # acc = 'CG' acc = 'LA' # dept -> account # Tag accounts: # subfunc # Tags relate to other tags ... repo = db.Repository(dburi) # skip headings reader.next() _clean = lambda _str: unicode(_str.strip()) for count, row in enumerate(reader): deptcode = _clean(row[0]) dept = _clean(row[1]) # have some blank rows at end if not dept: continue function = _clean(row[2]) subfunction = _clean(row[3]) pog = _clean(row[4]) poga = _clean(row[5]) # take verbose one # pog = row['Programme Object Group'] caporcur = _clean(row[7]) region = _clean(row[9]) exps = row[10:] area = db.Area(title=poga, deptcode=deptcode, department=dept, function=function, subfunction=subfunction, pog=pog, cap_or_cur=caporcur, region=region) for ii, exp in enumerate(exps): amount = swiss.floatify(exp) if amount: # do not bother with null or zero amounts area.expenditures.append( db.Expenditure(amount=amount, year=2003 + ii)) if count % 5000 == 0: print 'Completed: %s' % count db.Session.commit() db.Session.remove() db.Session.commit()
def getSurvivalHighScores(self): return db.Repository().getSurvivalHighscore()
def getQuizHighScores(self): return db.Repository().getQuizHighscore()
def addToGoodQuestions(self, data): db.Repository().addToGoodQuestions(data)
def addQuizHighScore(self, data): db.Repository().addQuizHighScore(data)
def addSurvivalHichScore(self, data): db.Repository().addSurvivalHichScore(data)
def deployGoodQuestion(self): return db.Repository().getAllGoodQuestionsInOrderOfBestQuestions()