Example #1
0
    def init_connection(self, db):
        from api_etl.config import Config
        self.config = Config()

        self.database_name = db
        owner = self.config.database('owner')
        self.conn = psycopg2.connect("dbname={} user={}".format(db, owner))
Example #2
0
class PostgresLoader(Loader):
    """

    """
    def init_connection(self, db):
        from api_etl.config import Config
        self.config = Config()

        self.database_name = db
        owner = self.config.database('owner')
        self.conn = psycopg2.connect("dbname={} user={}".format(db, owner))

    def close_connection(self):
        self.conn.close()

    def table_exists(self, service_manifest):
        print "  Checking if table {} exists in DB {}".format(service_manifest.name, self.database_name)
        q = """
            SELECT EXISTS (
               SELECT 1
               FROM   information_schema.tables
               WHERE  table_schema = 'public'
               AND    table_name = '{}'
            );
        """.strip().format(service_manifest.name)

        cur = self.conn.cursor()
        cur.execute(q)
        result = cur.fetchone()
        cur.close()

        return result[0]

    def create_table(self, service_manifest, input_file):
        print "  Creating table {} in DB {}".format(service_manifest.name, self.database_name)
        reader = csv.reader(open(input_file))
        q = self._create_sql(service_manifest, reader.next())

        cur = self.conn.cursor()
        cur.execute(q)
        self.conn.commit()
        cur.close()

    def load_data(self, service_manifest, source_file, encoding):
        pk = service_manifest.table_settings["pk_name"]

        print "  Loading data into table {}".format(service_manifest.name)
        reader = csv.DictReader(open(source_file), encoding=encoding)
        inserted = 0
        for row in reader:
            if not self._row_exists(row, pk, service_manifest.name):
                self._insert_row(service_manifest.name, row)
                inserted += 1

        if inserted:
            self.conn.commit()

        print "  Inserted {} rows into database".format(inserted)

    def _insert_row(self, tablename, row):
        cols = []
        vals = []

        cols = [k for k in row.keys()]
        for c in cols:
            v = row[c].replace("'", "''")
            vals.append(u"'{}'".format(v).strip())

        if 'latitude' in cols and 'longitude' in cols:
            if row['latitude'] and row['longitude']:
                cols.append('latlong')
                vals.append(u"ST_PointFromText('POINT(%(longitude)s %(latitude)s)', 4326)" % row)

        cols = ",".join(cols)
        vals = ','.join(vals)
        q = u"""
            INSERT INTO {tbl}({cols})
            VALUES({vals});
        """.format(tbl=tablename, cols=cols, vals=vals).strip()

        cur = self.conn.cursor()
        cur.execute(q)
        cur.close()


    def _row_exists(self, row, pk, tablename):
        if isinstance(pk, str):
            pk = [pk]

        cur = self.conn.cursor()
        query = "SELECT count(*) FROM {tbl} WHERE ".format(tbl=tablename)

        where_clauses = []
        for key in pk:
             where_clauses.append(u"{col} = '{val}'".format(col=key, val=row[key]))

        query += " AND ".join(where_clauses)

        cur.execute(query)
        result = cur.fetchone()
        cur.close()

        return result[0]

    def _get_pk_name(self, service_manifest):
        table_settings = service_manifest.table_settings
        return table_settings['pk_name']

    def _create_sql(self, service_manifest, headers):
        # TODO: We should here check the schema for required fields so can can NOT NULL
        # the relevant columns - making sure to slugify them first ...
        columns = []

        pkname = self._get_pk_name(service_manifest)

        table_settings = service_manifest.table_settings
        if 'index' in table_settings:
            indices = [i.strip() for i in table_settings['index'].split(',')]
        else:
            indices = []
        print "  Indices are {}".format(indices)

        for h in headers:
            columns.append("{} TEXT".format(h))

        if 'latitude' in headers and 'longitude' in headers:
            columns.append("LatLong geography(POINT, 4326)")

        if isinstance(pkname, list):
            columns.append("PRIMARY KEY ({})".format(", ".join(pkname)))
        else:
            columns.append("PRIMARY KEY ({})".format(pkname))

        q = """
            CREATE TABLE {}({});\n
        """.strip().format(service_manifest.name, ",\n".join(columns))
        idx = []
        for i in indices:
            s = "CREATE INDEX ON {} ((lower({})));".format(service_manifest.name, i)
            idx.append(s)

        idx = ";\n".join(idx)

        grant_q = "GRANT SELECT ON ALL TABLES IN SCHEMA public TO {};".format(self.config.database('reader_username'))

        return q + idx + grant_q