Ejemplo n.º 1
0
    def test_sqlite3db_copy_on_exit(self):
        """ Test copy_on_exit """
        target = tempfile.mktemp()
        with sqlite3db(":memory:", copy_on_exit=target) as cursor:
            cursor.execute("CREATE TABLE test (i INTEGER, t TEXT)")
            cursor.execute("INSERT INTO test VALUES (?, ?)",
                           (1, "Hello World"))

        self.assertTrue(os.path.exists(target))
        with sqlite3db(target) as cursor:
            cursor.execute("SELECT i, t FROM test LIMIT 1")
            row = cursor.fetchone()
            self.assertEquals((1, "Hello World"), row)
Ejemplo n.º 2
0
    def run(self):
        output = shellout("cut -f2 {input} | LANG=C sort | LANG=C uniq > {output}", input=self.input().path)
        with open(output) as handle:
            dates = map(string.strip, handle.readlines())

        with self.output().open('w') as output:
            for date in dates:
                dateobj = datetime.date(*map(int, date.split('-')))
                marc = SWBOpenDataMarc(date=dateobj)
                sdb = SWBOpenDataSeekMapDB(date=dateobj)
                luigi.build([marc, sdb], local_scheduler=True)
                with open(marc.output().path) as handle:
                    with sqlite3db(sdb.output().path) as cursor:
                        idset = df[df.date == date].id.values.tolist()
                        limit, offset = self.limit, 0
                        while True:
                            cursor.execute("""
                                SELECT offset, length
                                FROM seekmap WHERE id IN (%s)""" % (
                                    ','.join(("'%s'" % id for id in idset[offset:offset + limit]))))
                            rows = cursor.fetchall()
                            if not rows:
                                break
                            else:
                                copyregions(handle, output, rows)
                                offset += limit
Ejemplo n.º 3
0
 def run(self):
     with self.input().get('surface').open() as handle:
         with self.output().open('w') as output:
             with self.input().get('file').open() as fh:
                 with sqlite3db(self.input().get('seekmap').path) as cursor:
                     regions = []
                     for row in handle.iter_tsv(cols=('id', 'date')):
                         cursor.execute("SELECT offset, length FROM seekmap where id = ?", (row.id,))
                         regions.append(cursor.fetchone())
                     copyregions(fh, output, regions)
Ejemplo n.º 4
0
 def run(self):
     kv = shellout(""" tabtokv -f "1,3" -o {output} {input}""", input=self.input().get("dbppics").path)
     with self.input().get("gndto").open() as handle:
         with sqlite3db(kv) as cursor:
             with self.output().open("w") as output:
                 for row in handle.iter_tsv(cols=("dbp", "gnd")):
                     cursor.execute("""select value from store where key = ?""", (row.dbp,))
                     result = cursor.fetchall()
                     for url in set(result):
                         output.write_tsv(row.gnd, url[0])
Ejemplo n.º 5
0
 def run(self):
     _, stopover = tempfile.mkstemp(prefix="siskin-")
     with sqlite3db(stopover) as cursor:
         for relation, target in self.input().iteritems():
             table = relation.replace(":", "_")
             cursor.execute("""CREATE TABLE IF NOT EXISTS %s (s TEXT, o TEXT)""" % table)
             with target.open() as handle:
                 for row in handle.iter_tsv(cols=("s", "o")):
                     cursor.execute("""INSERT INTO %s (s, o) VALUES (?, ?)""" % table, row)
             cursor.connection.commit()
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s on %s (s)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o on %s (o)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_s_o on %s (s, o)""" % (table, table))
             cursor.execute("""CREATE INDEX IF NOT EXISTS idx_%s_o_s on %s (o, s)""" % (table, table))
             cursor.connection.commit()
     luigi.File(stopover).move(self.output().path)
Ejemplo n.º 6
0
 def test_ptimed(self):
     """ Test the persistent timed on a tempfile db. """
     self.assertTrue(os.path.exists(BENCHMARK_DB))
     dummy = Dummy()
     dummy.dummy_sum()
     with sqlite3db(BENCHMARK_DB) as cursor:
         cursor.execute("SELECT count(*) FROM t")
         self.assertEquals(1, cursor.fetchone()[0])
         cursor.execute("SELECT * FROM t")
         name, elapsed, added, status = cursor.fetchone()
         self.assertEquals(name, 'benchmark_test.Dummy.dummy_sum')
         self.assertTrue(0 < elapsed)
         self.assertTrue(1 > elapsed)
         self.assertEquals('green', status)
         today = datetime.datetime.now(pytz.utc).date()
         self.assertTrue(added.startswith(str(today)),
                         msg='added={0}, today={1}'.format(added, today))
Ejemplo n.º 7
0
 def test_ptimed(self):
     """ Test the persistent timed on a tempfile db. """
     self.assertTrue(os.path.exists(BENCHMARK_DB))
     dummy = Dummy()
     dummy.dummy_sum()
     with sqlite3db(BENCHMARK_DB) as cursor:
         cursor.execute("SELECT count(*) FROM t")
         self.assertEquals(1, cursor.fetchone()[0])
         cursor.execute("SELECT * FROM t")
         name, elapsed, added, status = cursor.fetchone()
         self.assertEquals(name, 'benchmark_test.Dummy.dummy_sum')
         self.assertTrue(0 < elapsed)
         self.assertTrue(1 > elapsed)
         self.assertEquals('green', status)
         today = datetime.datetime.now(pytz.utc).date()
         self.assertTrue(added.startswith(str(today)),
                         msg='added={0}, today={1}'.format(added, today))
Ejemplo n.º 8
0
    def decorator(*args, **kwargs):
        """ Benchmark decorator. """
        with Timer() as timer:
            result = method(*args, **kwargs)

        module = args[0].__module__
        klass = args[0].__class__.__name__
        fun = method.__name__

        key = '{0}.{1}.{2}'.format(module, klass, fun)
        value = timer.elapsed_s
        # just a quick visual impression, everything that takes more
        # than 10s is yellow, more then 1min, red.
        status = 'green'
        if value > 10:
            status = 'yellow'
        if value > 60:
            status = 'red'

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        with sqlite3db(path) as cursor:
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS
                t (key TEXT, value REAL, date TEXT, status TEXT)
            """)
            cursor.execute(
                """
                INSERT INTO t (key, value, date, status)
                VALUES (?, ?, datetime('now'), ?)
            """, (key, value, status))

        if log is True:
            msg = '[%s.%s] %0.5f' % (klass, fun, timer.elapsed_s)
            if timer.elapsed_s <= timer.green:
                logger.debug(green(msg))
            elif timer.elapsed_s <= timer.yellow:
                logger.debug(yellow(msg))
            else:
                logger.debug(red(msg))

        return result
Ejemplo n.º 9
0
    def run(self):
        _, stopover = tempfile.mkstemp(prefix="siskin-")
        pattern = re.compile("""rdf:about="http://d-nb.info/gnd/([0-9X-]+)">""")

        with sqlite3db(stopover) as cursor:
            cursor.execute("""CREATE TABLE gnd (id text  PRIMARY KEY, content blob)""")
            cursor.execute("""CREATE INDEX IF NOT EXISTS idx_gnd_id ON gnd (id)""")

            with self.input().open() as handle:
                groups = itertools.groupby(handle, key=str.isspace)
                for i, (k, lines) in enumerate(groups):
                    if k:
                        continue
                    lines = map(string.strip, list(lines))
                    match = pattern.search(lines[0])
                    if match:
                        row = (match.group(1), "\n".join(lines))
                        cursor.execute("INSERT INTO gnd VALUES (?, ?)", row)

        luigi.File(stopover).move(self.output().path)
Ejemplo n.º 10
0
    def decorator(*args, **kwargs):
        """ Benchmark decorator. """
        with Timer() as timer:
            result = method(*args, **kwargs)

        module = args[0].__module__
        klass = args[0].__class__.__name__
        fun = method.__name__

        key = '{0}.{1}.{2}'.format(module, klass, fun)
        value = timer.elapsed_s
        # just a quick visual impression, everything that takes more
        # than 10s is yellow, more then 1min, red.
        status = 'green'
        if value > 10:
            status = 'yellow'
        if value > 60:
            status = 'red'

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        with sqlite3db(path) as cursor:
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS
                t (key TEXT, value REAL, date TEXT, status TEXT)
            """)
            cursor.execute("""
                INSERT INTO t (key, value, date, status)
                VALUES (?, ?, datetime('now'), ?)
            """, (key, value, status))

        if log is True:
            msg = '[%s.%s] %0.5f' % (klass, fun, timer.elapsed_s)
            if timer.elapsed_s <= timer.green:
                logger.debug(green(msg))
            elif timer.elapsed_s <= timer.yellow:
                logger.debug(yellow(msg))
            else:
                logger.debug(red(msg))

        return result
Ejemplo n.º 11
0
 def run(self):
     with sqlite3db(self.input().path) as conn:
         with self.output().open('w') as output:
             conn.execute("""SELECT finc_id, record_id FROM finc_mapping WHERE source_id = ?""", ('28',))
             for row in conn.fetchall():
                 output.write_tsv(row[0], 'ai-28-%s' % row[1])
Ejemplo n.º 12
0
 def test_sqlite3db(self):
     """ Test CM yields correct object. """
     with sqlite3db(tempfile.mktemp()) as cursor:
         self.assertEquals(sqlite3.Cursor, cursor.__class__)