コード例 #1
0
ファイル: ebl.py プロジェクト: evelynweiser/siskin
 def run(self):
     _, stopover = tempfile.mkstemp(prefix='siskin-')
     with sqlitedb(self.input().get('db').path) as cursor:
         with self.input().get('surface').open() as handle:
             with open(stopover, 'wb') as output:
                 for row in handle.iter_tsv(cols=('id', 'date')):
                     cursor.execute("SELECT record from store where id = ? and secondary = ?", (row.id, row.date))
                     result = cursor.fetchone()
                     output.write(base64.b64decode(result[0]))
     luigi.File(stopover).move(self.output().path)
コード例 #2
0
ファイル: gnd.py プロジェクト: evelynweiser/siskin
 def run(self):
     kv = shellout(""" tabtokv -f "1,3" -o {output} {input}""", input=self.input().get('dbppics').path)
     with self.input().get('gndto').open() as handle:
         with sqlitedb(kv) as cursor:
             with self.output().open('w') as output:
                 for row in handle.iter_tsv(cols=('dbp', 'gnd')):
                     cursor.execute("""select value from store where key = ?""", (row.dbp,))
                     result = cursor.fetchall()
                     for url in set(result):
                         output.write_tsv(row.gnd, url[0])
コード例 #3
0
ファイル: oso.py プロジェクト: evelynweiser/siskin
 def run(self):
     with self.input().get('surface').open() as handle:
         with self.output().open('w') as output:
             with self.input().get('file').open() as fh:
                 with sqlitedb(self.input().get('seekmap').path) as cursor:
                     regions = []
                     for row in handle.iter_tsv(cols=('id', 'date')):
                         cursor.execute("SELECT offset, length FROM seekmap where id = ?", (row.id,))
                         regions.append(cursor.fetchone())
                     copyregions(fh, output, regions)
コード例 #4
0
ファイル: gnd.py プロジェクト: evelynweiser/siskin
 def run(self):
     _, stopover = tempfile.mkstemp(prefix='siskin-')
     with sqlitedb(stopover) as cursor:
         for relation, target in self.input().iteritems():
             table = relation.replace(':', '_')
             cursor.execute("""CREATE TABLE IF NOT EXISTS %s (s TEXT, o TEXT)""" % table)
             with target.open() as handle:
                 for row in handle.iter_tsv(cols=('s', 'o')):
                     cursor.execute("""INSERT INTO %s (s, o) VALUES (?, ?)""" % table, row)
             cursor.connection.commit()
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s on %s (s)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o on %s (o)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s_o on %s (s, o)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o_s on %s (o, s)""" % (table, table))
             cursor.connection.commit()
     luigi.File(stopover).move(self.output().path)
コード例 #5
0
ファイル: gnd.py プロジェクト: evelynweiser/siskin
    def run(self):
        _, stopover = tempfile.mkstemp(prefix='siskin-')
        pattern = re.compile("""rdf:about="http://d-nb.info/gnd/([0-9X-]+)">""")

        with sqlitedb(stopover) as cursor:
            cursor.execute("""CREATE TABLE gnd (id text  PRIMARY KEY, content blob)""")
            cursor.execute("""CREATE INDEX IF NOT EXISTS idx_gnd_id ON gnd (id)""")

            with self.input().open() as handle:
                groups = itertools.groupby(handle, key=str.isspace)
                for i, (k, lines) in enumerate(groups):
                    if k:
                        continue
                    lines = map(string.strip, list(lines))
                    match = pattern.search(lines[0])
                    if match:
                        row = (match.group(1), '\n'.join(lines))
                        cursor.execute("INSERT INTO gnd VALUES (?, ?)", row)

        luigi.File(stopover).move(self.output().path)
コード例 #6
0
ファイル: doaj.py プロジェクト: evelynweiser/siskin
 def run(self):
     with sqlitedb(self.input().path) as conn:
         with self.output().open('w') as output:
             conn.execute("""SELECT finc_id, record_id FROM finc_mapping WHERE source_id = ?""", ('28',))
             for row in conn.fetchall():
                 output.write_tsv(row[0], 'ai-28-%s' % row[1])