def test_sqlite3db_copy_on_exit(self): """ Test copy_on_exit """ target = tempfile.mktemp() with sqlite3db(":memory:", copy_on_exit=target) as cursor: cursor.execute("CREATE TABLE test (i INTEGER, t TEXT)") cursor.execute("INSERT INTO test VALUES (?, ?)", (1, "Hello World")) self.assertTrue(os.path.exists(target)) with sqlite3db(target) as cursor: cursor.execute("SELECT i, t FROM test LIMIT 1") row = cursor.fetchone() self.assertEquals((1, "Hello World"), row)
def run(self): output = shellout("cut -f2 {input} | LANG=C sort | LANG=C uniq > {output}", input=self.input().path) with open(output) as handle: dates = map(string.strip, handle.readlines()) with self.output().open('w') as output: for date in dates: dateobj = datetime.date(*map(int, date.split('-'))) marc = SWBOpenDataMarc(date=dateobj) sdb = SWBOpenDataSeekMapDB(date=dateobj) luigi.build([marc, sdb], local_scheduler=True) with open(marc.output().path) as handle: with sqlite3db(sdb.output().path) as cursor: idset = df[df.date == date].id.values.tolist() limit, offset = self.limit, 0 while True: cursor.execute(""" SELECT offset, length FROM seekmap WHERE id IN (%s)""" % ( ','.join(("'%s'" % id for id in idset[offset:offset + limit])))) rows = cursor.fetchall() if not rows: break else: copyregions(handle, output, rows) offset += limit
def run(self): with self.input().get('surface').open() as handle: with self.output().open('w') as output: with self.input().get('file').open() as fh: with sqlite3db(self.input().get('seekmap').path) as cursor: regions = [] for row in handle.iter_tsv(cols=('id', 'date')): cursor.execute("SELECT offset, length FROM seekmap where id = ?", (row.id,)) regions.append(cursor.fetchone()) copyregions(fh, output, regions)
def run(self): kv = shellout(""" tabtokv -f "1,3" -o {output} {input}""", input=self.input().get("dbppics").path) with self.input().get("gndto").open() as handle: with sqlite3db(kv) as cursor: with self.output().open("w") as output: for row in handle.iter_tsv(cols=("dbp", "gnd")): cursor.execute("""select value from store where key = ?""", (row.dbp,)) result = cursor.fetchall() for url in set(result): output.write_tsv(row.gnd, url[0])
def run(self): _, stopover = tempfile.mkstemp(prefix="siskin-") with sqlite3db(stopover) as cursor: for relation, target in self.input().iteritems(): table = relation.replace(":", "_") cursor.execute("""CREATE TABLE IF NOT EXISTS %s (s TEXT, o TEXT)""" % table) with target.open() as handle: for row in handle.iter_tsv(cols=("s", "o")): cursor.execute("""INSERT INTO %s (s, o) VALUES (?, ?)""" % table, row) cursor.connection.commit() cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s on %s (s)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o on %s (o)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s_o on %s (s, o)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o_s on %s (o, s)""" % (table, table)) cursor.connection.commit() luigi.File(stopover).move(self.output().path)
def test_ptimed(self): """ Test the persistent timed on a tempfile db. """ self.assertTrue(os.path.exists(BENCHMARK_DB)) dummy = Dummy() dummy.dummy_sum() with sqlite3db(BENCHMARK_DB) as cursor: cursor.execute("SELECT count(*) FROM t") self.assertEquals(1, cursor.fetchone()[0]) cursor.execute("SELECT * FROM t") name, elapsed, added, status = cursor.fetchone() self.assertEquals(name, 'benchmark_test.Dummy.dummy_sum') self.assertTrue(0 < elapsed) self.assertTrue(1 > elapsed) self.assertEquals('green', status) today = datetime.datetime.now(pytz.utc).date() self.assertTrue(added.startswith(str(today)), msg='added={0}, today={1}'.format(added, today))
def test_ptimed(self): """ Test the persistent timed on a tempfile db. """ self.assertTrue(os.path.exists(BENCHMARK_DB)) dummy = Dummy() dummy.dummy_sum() with sqlite3db(BENCHMARK_DB) as cursor: cursor.execute("SELECT count(*) FROM t") self.assertEquals(1, cursor.fetchone()[0]) cursor.execute("SELECT * FROM t") name, elapsed, added, status = cursor.fetchone() self.assertEquals(name, 'benchmark_test.Dummy.dummy_sum') self.assertTrue(0 < elapsed) self.assertTrue(1 > elapsed) self.assertEquals('green', status) today = datetime.datetime.now(pytz.utc).date() self.assertTrue(added.startswith(str(today)), msg='added={0}, today={1}'.format(added, today))
def decorator(*args, **kwargs): """ Benchmark decorator. """ with Timer() as timer: result = method(*args, **kwargs) module = args[0].__module__ klass = args[0].__class__.__name__ fun = method.__name__ key = '{0}.{1}.{2}'.format(module, klass, fun) value = timer.elapsed_s # just a quick visual impression, everything that takes more # than 10s is yellow, more then 1min, red. status = 'green' if value > 10: status = 'yellow' if value > 60: status = 'red' if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) with sqlite3db(path) as cursor: cursor.execute(""" CREATE TABLE IF NOT EXISTS t (key TEXT, value REAL, date TEXT, status TEXT) """) cursor.execute( """ INSERT INTO t (key, value, date, status) VALUES (?, ?, datetime('now'), ?) """, (key, value, status)) if log is True: msg = '[%s.%s] %0.5f' % (klass, fun, timer.elapsed_s) if timer.elapsed_s <= timer.green: logger.debug(green(msg)) elif timer.elapsed_s <= timer.yellow: logger.debug(yellow(msg)) else: logger.debug(red(msg)) return result
def run(self): _, stopover = tempfile.mkstemp(prefix="siskin-") pattern = re.compile("""rdf:about="http://d-nb.info/gnd/([0-9X-]+)">""") with sqlite3db(stopover) as cursor: cursor.execute("""CREATE TABLE gnd (id text PRIMARY KEY, content blob)""") cursor.execute("""CREATE INDEX IF NOT EXISTS idx_gnd_id ON gnd (id)""") with self.input().open() as handle: groups = itertools.groupby(handle, key=str.isspace) for i, (k, lines) in enumerate(groups): if k: continue lines = map(string.strip, list(lines)) match = pattern.search(lines[0]) if match: row = (match.group(1), "\n".join(lines)) cursor.execute("INSERT INTO gnd VALUES (?, ?)", row) luigi.File(stopover).move(self.output().path)
def decorator(*args, **kwargs): """ Benchmark decorator. """ with Timer() as timer: result = method(*args, **kwargs) module = args[0].__module__ klass = args[0].__class__.__name__ fun = method.__name__ key = '{0}.{1}.{2}'.format(module, klass, fun) value = timer.elapsed_s # just a quick visual impression, everything that takes more # than 10s is yellow, more then 1min, red. status = 'green' if value > 10: status = 'yellow' if value > 60: status = 'red' if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) with sqlite3db(path) as cursor: cursor.execute(""" CREATE TABLE IF NOT EXISTS t (key TEXT, value REAL, date TEXT, status TEXT) """) cursor.execute(""" INSERT INTO t (key, value, date, status) VALUES (?, ?, datetime('now'), ?) """, (key, value, status)) if log is True: msg = '[%s.%s] %0.5f' % (klass, fun, timer.elapsed_s) if timer.elapsed_s <= timer.green: logger.debug(green(msg)) elif timer.elapsed_s <= timer.yellow: logger.debug(yellow(msg)) else: logger.debug(red(msg)) return result
def run(self): with sqlite3db(self.input().path) as conn: with self.output().open('w') as output: conn.execute("""SELECT finc_id, record_id FROM finc_mapping WHERE source_id = ?""", ('28',)) for row in conn.fetchall(): output.write_tsv(row[0], 'ai-28-%s' % row[1])
def test_sqlite3db(self): """ Test CM yields correct object. """ with sqlite3db(tempfile.mktemp()) as cursor: self.assertEquals(sqlite3.Cursor, cursor.__class__)