def run(self): _, stopover = tempfile.mkstemp(prefix='siskin-') with sqlitedb(self.input().get('db').path) as cursor: with self.input().get('surface').open() as handle: with open(stopover, 'wb') as output: for row in handle.iter_tsv(cols=('id', 'date')): cursor.execute("SELECT record from store where id = ? and secondary = ?", (row.id, row.date)) result = cursor.fetchone() output.write(base64.b64decode(result[0])) luigi.File(stopover).move(self.output().path)
def run(self): kv = shellout(""" tabtokv -f "1,3" -o {output} {input}""", input=self.input().get('dbppics').path) with self.input().get('gndto').open() as handle: with sqlitedb(kv) as cursor: with self.output().open('w') as output: for row in handle.iter_tsv(cols=('dbp', 'gnd')): cursor.execute("""select value from store where key = ?""", (row.dbp,)) result = cursor.fetchall() for url in set(result): output.write_tsv(row.gnd, url[0])
def run(self): with self.input().get('surface').open() as handle: with self.output().open('w') as output: with self.input().get('file').open() as fh: with sqlitedb(self.input().get('seekmap').path) as cursor: regions = [] for row in handle.iter_tsv(cols=('id', 'date')): cursor.execute("SELECT offset, length FROM seekmap where id = ?", (row.id,)) regions.append(cursor.fetchone()) copyregions(fh, output, regions)
def run(self): _, stopover = tempfile.mkstemp(prefix='siskin-') with sqlitedb(stopover) as cursor: for relation, target in self.input().iteritems(): table = relation.replace(':', '_') cursor.execute("""CREATE TABLE IF NOT EXISTS %s (s TEXT, o TEXT)""" % table) with target.open() as handle: for row in handle.iter_tsv(cols=('s', 'o')): cursor.execute("""INSERT INTO %s (s, o) VALUES (?, ?)""" % table, row) cursor.connection.commit() cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s on %s (s)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o on %s (o)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s_o on %s (s, o)""" % (table, table)) cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o_s on %s (o, s)""" % (table, table)) cursor.connection.commit() luigi.File(stopover).move(self.output().path)
def run(self): _, stopover = tempfile.mkstemp(prefix='siskin-') pattern = re.compile("""rdf:about="http://d-nb.info/gnd/([0-9X-]+)">""") with sqlitedb(stopover) as cursor: cursor.execute("""CREATE TABLE gnd (id text PRIMARY KEY, content blob)""") cursor.execute("""CREATE INDEX IF NOT EXISTS idx_gnd_id ON gnd (id)""") with self.input().open() as handle: groups = itertools.groupby(handle, key=str.isspace) for i, (k, lines) in enumerate(groups): if k: continue lines = map(string.strip, list(lines)) match = pattern.search(lines[0]) if match: row = (match.group(1), '\n'.join(lines)) cursor.execute("INSERT INTO gnd VALUES (?, ?)", row) luigi.File(stopover).move(self.output().path)
def run(self): with sqlitedb(self.input().path) as conn: with self.output().open('w') as output: conn.execute("""SELECT finc_id, record_id FROM finc_mapping WHERE source_id = ?""", ('28',)) for row in conn.fetchall(): output.write_tsv(row[0], 'ai-28-%s' % row[1])