Example #1
0
class SqlStepShould(unittest.TestCase):
    def setUp(self):
        self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)'))
        self.database = Database(psycopg2.connect(CONNECTION))
        self.database.open()
        table = TargetTable(self.schema, self.database)
        table.create()
        self.database.commit()

    def tearDown(self):
        self.database.open()
        self.database.execute('DROP TABLE IF EXISTS %s', (AsIs(TABLE_NAME),))
        self.database.commit()

    def test_run(self):
        SqlStep(self.database, ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1),
                ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2),
                ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))).run()

        self.database.open()
        self.database.execute(
            "SELECT COUNT(*) FROM %s",
            (AsIs(TABLE_NAME),))
        count = int(self.database.cursor.fetchone()[0])

        self.assertEqual(3, count)

        self.database.execute(
            "SELECT * FROM %s",
            (AsIs(TABLE_NAME),))
        values = list(self.database.cursor.fetchall())

        self.assertEqual([('1',), ('2',), ('3',)], values)

    def test_execute(self):
        self.database.open()
        sql = SqlStep(self.database)
        sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1))
        sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2))
        sql.execute(("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME)))
        self.database.commit()

        self.database.open()
        self.database.execute(
            "SELECT COUNT(*) FROM %s",
            (AsIs(TABLE_NAME),))
        count = int(self.database.cursor.fetchone()[0])

        self.assertEqual(3, count)

        self.database.execute(
            "SELECT * FROM %s",
            (AsIs(TABLE_NAME),))
        values = list(self.database.cursor.fetchall())

        self.assertEqual([('1',), ('2',), ('3',)], values)
Example #2
0
class SqlStepShould(unittest.TestCase):
    def setUp(self):
        self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)'))
        self.database = Database(psycopg2.connect(CONNECTION))
        self.database.open()
        table = TargetTable(self.schema, self.database)
        table.create()
        self.database.commit()

    def tearDown(self):
        self.database.open()
        self.database.execute('DROP TABLE IF EXISTS %s', (AsIs(TABLE_NAME), ))
        self.database.commit()

    def test_run(self):
        SqlStep(self.database, ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1),
                ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2),
                ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))).run()

        self.database.open()
        self.database.execute("SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME), ))
        count = int(self.database.cursor.fetchone()[0])

        self.assertEqual(3, count)

        self.database.execute("SELECT * FROM %s", (AsIs(TABLE_NAME), ))
        values = list(self.database.cursor.fetchall())

        self.assertEqual([('1', ), ('2', ), ('3', )], values)

    def test_execute(self):
        self.database.open()
        sql = SqlStep(self.database)
        sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1))
        sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2))
        sql.execute(("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME)))
        self.database.commit()

        self.database.open()
        self.database.execute("SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME), ))
        count = int(self.database.cursor.fetchone()[0])

        self.assertEqual(3, count)

        self.database.execute("SELECT * FROM %s", (AsIs(TABLE_NAME), ))
        values = list(self.database.cursor.fetchall())

        self.assertEqual([('1', ), ('2', ), ('3', )], values)
Example #3
0
class SqlManifest(object):
    def __init__(self, metadata, source, schema, bucket, db_connection):
        self.metadata = metadata
        self.source = source
        self.schema = schema
        self.bucket = bucket

        if isinstance(db_connection, Database):
            self.database = db_connection
        else:
            self.database = Database(db_connection)

        self.file_name = '{0}_manifest.json'.format(schema.table)
        self.journal_file_name = '{0}_journal.db'.format(schema.table)

    @property
    def all_keys(self):
        return (k.name for k in self.bucket.list(self.source)
                if not k.name.endswith('/'))

    @property
    def manifest_key(self):
        return normalize_path('{0}/{1}'.format(self.metadata, self.file_name))

    @property
    def journal_key(self):
        return normalize_path('{0}/{1}'.format(self.metadata,
                                               self.journal_file_name))

    @property
    def manifest_url(self):
        return 's3://{0}{1}'.format(self.bucket.name, self.manifest_key)

    def journal(self):
        journal = self.bucket.get(self.journal_key)
        if journal.exists():
            journal.get_contents_to_filename(self.journal_file_name)
            self.database.open()
            self.database.execute('SELECT key FROM journal')
            return (row[0] for row in self.database.fetchall())
        else:
            self.database.open()
            self.database.execute(
                'CREATE TABLE IF NOT EXISTS journal (key TEXT)')
            self.database.commit()
            self.database.close()
            return []

    def get(self):
        updated_journal = self.all_keys
        journal = self.journal()
        keys = list(set(updated_journal) - set(journal))

        return {
            'manifest': {
                'entries': ({
                    'url': 's3://{0}/{1}'.format(self.bucket.name, key),
                    'mandatory': True
                } for key in keys)
            },
            'updated_journal': updated_journal
        }

    def save(self):
        entries = self.get()['manifest']['entries']
        offset = 0
        offsets = []
        last_entry = None

        with open(self.file_name, 'wb') as f:
            f.seek(0)
            f.truncate()
            offset += self.__write(f, '{\n')
            offsets.append(offset)
            offset += self.__write(f, '"entries": [\n')
            offsets.append(offset)

            for entry in entries:
                line = json.dumps(entry) + ',\n'
                offset += self.__write(f, line)
                offsets.append(offset)
                last_entry = line
            f.seek(offsets[-2])
            f.truncate()
            if last_entry is not None:
                line = last_entry[0:-2] + '\n'
                f.write(line)
            f.write(']}')

        self.bucket.get(self.manifest_key).set_contents_from_filename(
            self.file_name)

    def commit(self, saved_keys):
        self.database.open()
        self.database.execute('DELETE FROM journal')
        for key in saved_keys:
            self.database.execute('INSERT INTO journal VALUES (?)', (key, ))
        self.database.commit()
        self.database.close()
        self.bucket.get(self.journal_key).set_contents_from_filename(
            self.journal_file_name)

    def exists(self):
        return self.bucket.get(self.manifest_key).exists()

    def journal_exists(self):
        return self.bucket.get(self.journal_key).exists()

    @staticmethod
    def __write(fd, line):
        fd.write(line)
        return len(line)