Example #1
0
    def __init__(self, dsn, setup_conn):
        self.wait_time = 0
        self.current_ids = None
        self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor)

        with setup_conn.cursor() as cur:
            # need to fetch those manually because register_hstore cannot
            # fetch them on an asynchronous connection below.
            hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
            hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")

        psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
                                        array_oid=hstore_array_oid)
def test_bad_query_ignore(temp_db):
    with closing(DBConnection('dbname=' + temp_db,
                              ignore_sql_errors=True)) as conn:
        conn.connect()

        conn.perform('SELECT efasfjsea')

        conn.wait()
Example #3
0
def add_tiger_data(data_dir, config, threads):
    """ Import tiger data from directory or tar file `data dir`.
    """
    dsn = config.get_libpq_dsn()
    sql_files, tar = handle_tarfile_or_directory(data_dir)

    if not sql_files:
        return

    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_start.sql')

    # Reading sql_files and then for each file line handling
    # sql_query in <threads - 1> chunks.
    sel = selectors.DefaultSelector()
    place_threads = max(1, threads - 1)

    # Creates a pool of database connections
    for _ in range(place_threads):
        conn = DBConnection(dsn)
        conn.connect()
        sel.register(conn, selectors.EVENT_WRITE, conn)

    for sql_file in sql_files:
        if not tar:
            file = open(sql_file)
        else:
            file = tar.extractfile(sql_file)

        handle_threaded_sql_statements(sel, file)

    # Unregistering pool of database connections
    handle_unregister_connection_pool(sel, place_threads)

    if tar:
        tar.close()
    print('\n')
    LOG.warning("Creating indexes on Tiger data")
    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_finish.sql')
Example #4
0
def conn(temp_db):
    with closing(DBConnection('dbname=' + temp_db)) as c:
        yield c
Example #5
0
def load_data(dsn, threads):
    """ Copy data into the word and placex table.
    """
    sel = selectors.DefaultSelector()
    # Then copy data from place to placex in <threads - 1> chunks.
    place_threads = max(1, threads - 1)
    for imod in range(place_threads):
        conn = DBConnection(dsn)
        conn.connect()
        conn.perform(
            pysql.SQL("""INSERT INTO placex ({columns})
                           SELECT {columns} FROM place
                           WHERE osm_id % {total} = {mod}
                             AND NOT (class='place' and (type='houses' or type='postcode'))
                             AND ST_IsValid(geometry)
                      """).format(columns=_COPY_COLUMNS,
                                  total=pysql.Literal(place_threads),
                                  mod=pysql.Literal(imod)))
        sel.register(conn, selectors.EVENT_READ, conn)

    # Address interpolations go into another table.
    conn = DBConnection(dsn)
    conn.connect()
    conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
                      SELECT osm_id, address, geometry FROM place
                      WHERE class='place' and type='houses' and osm_type='W'
                            and ST_GeometryType(geometry) = 'ST_LineString'
                 """)
    sel.register(conn, selectors.EVENT_READ, conn)

    # Now wait for all of them to finish.
    todo = place_threads + 1
    while todo > 0:
        for key, _ in sel.select(1):
            conn = key.data
            sel.unregister(conn)
            conn.wait()
            conn.close()
            todo -= 1
        print('.', end='', flush=True)
    print('\n')

    with connect(dsn) as conn:
        with conn.cursor() as cur:
            cur.execute('ANALYSE')
Example #6
0
 def __init__(self, dsn, pool_size):
     self.threads = [DBConnection(dsn) for _ in range(pool_size)]
     self.free_workers = self._yield_free_worker()
     self.wait_time = 0
Example #7
0
class PlaceFetcher:
    """ Asynchronous connection that fetches place details for processing.
    """
    def __init__(self, dsn, setup_conn):
        self.wait_time = 0
        self.current_ids = None
        self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor)

        with setup_conn.cursor() as cur:
            # need to fetch those manually because register_hstore cannot
            # fetch them on an asynchronous connection below.
            hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
            hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")

        psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
                                        array_oid=hstore_array_oid)

    def close(self):
        """ Close the underlying asynchronous connection.
        """
        if self.conn:
            self.conn.close()
            self.conn = None


    def fetch_next_batch(self, cur, runner):
        """ Send a request for the next batch of places.
            If details for the places are required, they will be fetched
            asynchronously.

            Returns true if there is still data available.
        """
        ids = cur.fetchmany(100)

        if not ids:
            self.current_ids = None
            return False

        if hasattr(runner, 'get_place_details'):
            runner.get_place_details(self.conn, ids)
            self.current_ids = []
        else:
            self.current_ids = ids

        return True

    def get_batch(self):
        """ Get the next batch of data, previously requested with
            `fetch_next_batch`.
        """
        if self.current_ids is not None and not self.current_ids:
            tstart = time.time()
            self.conn.wait()
            self.wait_time += time.time() - tstart
            self.current_ids = self.conn.cursor.fetchall()

        return self.current_ids

    def __enter__(self):
        return self


    def __exit__(self, exc_type, exc_value, traceback):
        self.conn.wait()
        self.close()
Example #8
0
 def _setup_connections(self):
     self.conn = psycopg2.connect(self.dsn)
     self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)]
Example #9
0
def load_data(dsn, data_dir, threads):
    """ Copy data into the word and placex table.
    """
    # Pre-calculate the most important terms in the word list.
    db_utils.execute_file(dsn, data_dir / 'words.sql')

    sel = selectors.DefaultSelector()
    # Then copy data from place to placex in <threads - 1> chunks.
    place_threads = max(1, threads - 1)
    for imod in range(place_threads):
        conn = DBConnection(dsn)
        conn.connect()
        conn.perform("""INSERT INTO placex ({0})
                         SELECT {0} FROM place
                         WHERE osm_id % {1} = {2}
                           AND NOT (class='place' and type='houses')
                           AND ST_IsValid(geometry)
                     """.format(_COPY_COLUMNS, place_threads, imod))
        sel.register(conn, selectors.EVENT_READ, conn)

    # Address interpolations go into another table.
    conn = DBConnection(dsn)
    conn.connect()
    conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
                      SELECT osm_id, address, geometry FROM place
                      WHERE class='place' and type='houses' and osm_type='W'
                            and ST_GeometryType(geometry) = 'ST_LineString'
                 """)
    sel.register(conn, selectors.EVENT_READ, conn)

    # Now wait for all of them to finish.
    todo = place_threads + 1
    while todo > 0:
        for key, _ in sel.select(1):
            conn = key.data
            sel.unregister(conn)
            conn.wait()
            conn.close()
            todo -= 1
        print('.', end='', flush=True)
    print('\n')

    with connect(dsn) as conn:
        with conn.cursor() as cur:
            cur.execute('ANALYSE')