Ejemplo n.º 1
0
    def table_track(self, coll_map_cur, coll_map_new):
        """
        coll_map_cur : list
                     : current collection map
        coll_map_new : list
                     : new collection map
        Update the extractor object's collection map and
        starts tracking collections (data transfer).

        TODO: take care of extra props type (JSONB)
        """
        logger.info("%s Adding new collection" %
                    CURR_FILE)
        colls_cur = [x[1] for x in coll_map_cur]
        colls_new = [x[1] for x in coll_map_new]
        colls_to_add = [x for x in colls_new if x not in colls_cur]

        for name_coll in colls_to_add:
            coll_def = [x for x in coll_map_new if x[1] == name_coll][0]
            columns = coll_def[3]
            name_rel = coll_def[2]
            type_extra_prop = 'JSONB'

            meta = {
                ':table': name_rel,
                ':extra_props': type_extra_prop
            }

            self.coll_def[name_coll] = {
                ':columns': columns,
                ':meta': meta,
            }
            self.transfer_coll(name_coll)
Ejemplo n.º 2
0
def drop_type_notification(db, name):
    """
    Drops the function which creates a notification.
    The notification created by this function is sent to channel 'purr'
    and signalizes a type change.

    Parameters
    ----------
    db  : obj
        : Postgres connection object
    name: sting
        : name of the function
    Returns
    -------
    -
    Example
    -------
    drop_notification(db, function_name)
    """
    cmd = "DROP FUNCTION IF EXISTS %s();" % name

    try:
        db.execute_cmd(cmd)
        logger.info("Dropping procedure: %s" % name, CURR_FILE)
    except Exception as ex:
        logger.error("Dropping procedure failed: %s" % ex, CURR_FILE)
Ejemplo n.º 3
0
def log_tailed_docs(pg, schema, docs_useful, ids_log, table_name, oper,
                    merged):
    log_entries = []
    ts = time.time()
    logger.info("IDs: %s" % ids_log)
    if len(ids_log) != len(docs_useful) and oper != DELETE:
        logger.error("n(ids)=%s; n(docs_useful)=%s" %
                     (len(ids_log), len(docs_useful)))
    for i in range(len(docs_useful)):
        id = ids_log[i]
        doc = "no entry"
        try:
            if docs_useful[i] is not None and oper != DELETE:
                doc = str(docs_useful[i])
            else:
                doc = "Doc is NULL"
        except Exception as ex:
            logger.error(
                "%s Converting log entry failed. Details: %s\n Document: " %
                (CURR_FILE, ex))
            logger.error(docs_useful[i])
        row = [oper, table_name, id, ts, merged, doc]
        log_row = tuple(row)
        log_entries.append(log_row)
    try:
        transfer_info.log_rows(pg, schema, log_entries)
    except Exception as ex:
        logger.error("%s Logging failed. Details: %s" % (CURR_FILE, ex))
Ejemplo n.º 4
0
def delete(db, schema, table_name, ids):
    """
    Deletes a row in a specific table of the PG database.

    Parameters
    ----------
    table_name : string
    object_id : ObjectId
                (will need to get the hex encoded version
                of ObjectId with str(object_id))

    Returns
    -------
    -

    Example
    -------
    delete(db, 'public', 'employee', "5acf593eed101e0c1266e32b")

    """
    oids = "','".join(ids)
    cmd = "DELETE FROM %s.%s WHERE id IN ('%s');" % (schema,
                                                     table_name.lower(), oids)
    logger.info("[ROW] %s" % cmd)
    db.execute_cmd(cmd)
Ejemplo n.º 5
0
def create_oplog_table(db, schema='public'):
    """
    Logs the operation, relation name, object id and
    timestamp for each entry of the oplog.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_oplog_table(pg, 'purr')

    """
    table_name = "purr_oplog"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]
    pks = table_desc[table_name]["pks"]

    values = [int(time.time())]
    try:
        table.drop(db, schema, [table_name])
        table.create(db, schema, table_name, attrs, types, pks)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Ejemplo n.º 6
0
    def transfer(self, coll_names_in_config):
        """
        Transfers documents or whole collections if the number of fields
        is less than 30 000 (batch_size).
        Types of attributes are determined using the collections.yml file.
        Returns
        -------
        -
        Parameters
        ----------
        coll_names : list
                   : list of collection names
        """

        coll_names = collection.check(self.mdb, coll_names_in_config)
        if len(coll_names) == 0:
            logger.info('%s No collections to transfer.' %
                        CURR_FILE
                        )
            return

        relation_names = []
        for coll in coll_names:
            relation_names.append(tc.snake_case(coll))

        if self.drop:
            table.drop(self.pg, self.schema, relation_names)
        elif self.truncate:
            table.truncate(self.pg, self.schema, coll_names)

        schema.create(self.pg, self.schema)

        for coll in coll_names:
            self.transfer_coll(coll)
Ejemplo n.º 7
0
def create_transfer_stats_table(db, schema='public'):
    """
    Logs the number, relation name, timestamp 
    for each collection transfer.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_transfer_stats_table(pg, 'purr')

    """
    table_name = "purr_transfer_stats"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]

    values = [int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Ejemplo n.º 8
0
def create_stat_table(db, schema='public'):
    """
    Creates a table that holds the timestamp of the
    latest successfully inserted item.
    Parameters
    ----------

    Returns
    -------
    -

    Example
    -------
    create_stat_table(pg, 'purr')

    """
    table_name = "purr_info"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]
    values = [0, None, int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        ts = get_latest_successful_ts(db, schema)
        if len(ts) == 0:
            row.insert(db, schema, table_name, attrs, values)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Ejemplo n.º 9
0
def create_log_error_table(db, schema='public'):
    """
    Logs the error's location, message and timestamp 
    when an it occurs.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_log_error_table(pg, 'purr')

    """
    table_name = "purr_error"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]

    values = [int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Ejemplo n.º 10
0
def drop(db, schema, tables):
    """
    Drop one or more tables in the PG database.

    Parameters
    ----------
    schema : string
    tables : list

    Example
    -------
    drop(pg, 'public', ['my_table'])

    Todo
    ----
    - first check if all tables in the list exist
    """
    tables_cmd = []
    for t in tables:
        tables_cmd.append('%s.%s' % (schema, t.lower()))
    tables_cmd = ', '.join(tables_cmd)

    cmd = "DROP TABLE IF EXISTS %s" % (tables_cmd)
    try:
        db.execute_cmd(cmd)
        logger.info('[TABLE] Dropping table(s) %s.' % (tables_cmd))
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Ejemplo n.º 11
0
def drop_trigger_type_notification(db, schema, table, name, proc):
    cmd = "DROP TRIGGER IF EXISTS %s ON %s.%s CASCADE" % (name, schema, table)
    try:
        logger.info("[TABLE] Dropping trigger '%s'" % name)
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Dropping trigger '%s' failed: %s" % (name, ex))
Ejemplo n.º 12
0
def create_type_notification(db, name):
    """
    Creates a function which will notify channel 'purr' about type changes.

    Parameters
    ----------
    db  : obj
        : Postgres connection object
    name: sting
        : name of the function
    Returns
    -------
    -
    Example
    -------
    create_notification(db, name_function)
    """
    cmd = """CREATE OR REPLACE FUNCTION %s()
    RETURNS TRIGGER AS $$
    BEGIN
        PERFORM pg_notify('purr', 'type_change');
        RETURN NULL;
    END;
    $$ LANGUAGE plpgsql;
    """ % name

    try:
        logger.info("Creating procedure: %s" % name, CURR_FILE)
        db.execute_cmd(cmd)

    except Exception as ex:
        logger.error("Insert failed: %s" % ex, CURR_FILE)
Ejemplo n.º 13
0
def vacuum(db, schema, table):
    cmd = "VACUUM FULL ANALYZE %s.%s;" % (schema, table)
    try:
        logger.info("[TABLE] Vacuuming table '%s.%s'" % (schema, table))
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Vacuuming table '%s.%s' failed: %s" %
                     (schema, table, ex))
Ejemplo n.º 14
0
def create_trigger_type_notification(db, schema, table, name, proc):
    cmd = """
    CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s.%s
    FOR EACH ROW EXECUTE PROCEDURE %s()
    """ % (name, schema, table, proc)
    try:
        logger.info("[TABLE] Creating trigger '%s'" % name)
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Creating trigger '%s' failed: %s" % (name, ex))
Ejemplo n.º 15
0
def prepare_docs_for_update(coll_settings, docs):
    docs_useful = []
    docs_id = []
    for doc in docs:
        # It is possible that multiple versions of one document
        # exist among these documents. they must be merged so they
        # can be sent Postgres together as one entry.
        merge_similar = False
        unset = {}
        doc_useful = {}
        temp = doc["o"]

        if "o2" in doc.keys():
            if "_id" in doc["o2"].keys():
                doc_useful["_id"] = str(doc["o2"]["_id"])
                if (doc_useful["_id"] in docs_id):
                    merge_similar = True
                else:
                    docs_id.append(str(doc_useful["_id"]))

        if "$set" in temp.keys():
            doc_useful.update(temp["$set"])
            for k, v in temp["$set"].items():
                if v is None:
                    unset[k] = "$unset"
        if "$unset" in temp.keys():
            for k, v in temp["$unset"].items():
                unset[k] = '$unset'
        if "$set" not in temp.keys() and "$unset" not in temp.keys():
            # case when the document was not updated
            # using a query, but the IDE e.g. Studio3T:
            logger.info("Direct update:")
            doc_useful.update(temp)
            fields = [x[":source"] for x in coll_settings[":columns"]]
            for k in fields:
                if k == '_id':
                    temp[k] = str(temp[k])
                    doc_useful.update(temp)
                if k not in temp.keys():
                    unset[k] = '$unset'
            for k, v in temp.items():
                if v is None:
                    unset[k] = '$unset'
        doc_useful.update(unset)

        # merging values with the same ID because there cannot be
        # multiple updates of the same row in one statement
        if merge_similar is True:
            for i in range(0, len(docs_useful)):
                if docs_useful[i]["_id"] == doc_useful["_id"]:
                    docs_useful[i] = dict(docs_useful[i], **doc_useful)
                    break
        else:
            docs_useful.append(doc_useful)
    return docs_useful, merge_similar
Ejemplo n.º 16
0
def get_table(db, schema='public'):
    cmd = """SELECT id, collection_name, relation_name,
    types FROM %s.purr_collection_map ORDER BY id""" % (schema)
    try:
        coll_map = db.execute_cmd_with_fetch(cmd)
        logger.info("Getting schema from DB.", CURR_FILE)
        return coll_map

    except Exception as ex:
        logger.error("[TRANSFER_INFO] Failed to get collection map table" %
                     (ex))
Ejemplo n.º 17
0
def reset(db, schema='public'):
    """
    Reset existing schema or create a new one.
    """
    drop = 'DROP SCHEMA IF EXISTS %s CASCADE;' % schema
    create = 'CREATE SCHEMA %s;' % schema
    try:
        db.execute_cmd(drop)
        db.execute_cmd(create)
        logger.info("[SCHEMA] Schema %s is reset." % schema)
    except Exception as ex:
        logger.error("[SCHEMA] Schema reset failed. %s" % ex)
Ejemplo n.º 18
0
def generate_collection_map(settings_mdb):
    """
    TODO:
    - add docs
    - disconnect from Mongo!
    """
    logger.info("Starting Purrito v%s ... =^..^=" % get_version(), CURR_FILE)

    logger.info("PID=%s" % os.getpid(), CURR_FILE)
    mongo = mongodb.MongoConnection(settings_mdb)
    coll_map = cm.create_map(mongo.conn, settings_mdb["db_name"])
    cm.create_file(coll_map)
    mongo.disconnect()
Ejemplo n.º 19
0
def create_file(coll_map):
    """
    Creates the collection map file.
    """
    name_file = "collections.yml"
    operation = "w"
    try:
        logger.info("%s Creating collection map file..." % CURR_FILE)
        with open(name_file, operation) as file_out:
            yaml.dump(coll_map, file_out, default_flow_style=False)
        logger.info("Collection map file created: %s" % name_file, CURR_FILE)
    except Exception as ex:
        logger.error("Failed to create collection map file. Details: %s" % ex,
                     CURR_FILE)
Ejemplo n.º 20
0
    def table_untrack(self, coll_map_cur, coll_map_new):
        tables_cur = [x[2] for x in coll_map_cur]
        tables_remaining = [x[2] for x in coll_map_new]

        tables_to_drop = [x for x in tables_cur if x not in tables_remaining]
        colls_to_remove = [x[1]
                           for x in coll_map_cur if x[2] in tables_to_drop]

        logger.info("%s Stop syncing collections %s." %
                    (
                        CURR_FILE,
                        ", ".join(colls_to_remove)))

        for coll in colls_to_remove:
            self.coll_def.pop(coll, None)
Ejemplo n.º 21
0
    def handle_multiple(self, docs, updated_at):
        # group by name
        docs_grouped = {}
        for doc in docs:
            collection = doc["ns"]["db"] + "." + doc["ns"]["coll"]
            if collection not in docs_grouped.keys():
                docs_grouped[collection] = []

            useful_info_update = {}
            if doc["operationType"] == UPDATE:
                if "updateDescription" in doc.keys():
                    useful_info_update = doc["updateDescription"]
            elif doc["operationType"] == INSERT:
                if "fullDocument" in doc.keys():
                    useful_info_update = doc["fullDocument"]
                else:
                    useful_info_update = doc["o2"]

            set_dict = {}
            d = {
                "op": doc["operationType"],
                "db_name": doc["ns"]["db"],
                "coll_name": doc["ns"]["coll"],
                "o": doc["documentKey"],
                "_id": doc["documentKey"]["_id"],
                "o2": useful_info_update
            }
            if doc["operationType"] == UPDATE:
                d["o"]["$set"] = useful_info_update['updatedFields']
            d["o"]["_id"] = doc["documentKey"]["_id"]
            if doc["operationType"] == INSERT:
                d["o"] = useful_info_update
            d["o2"]["_id"] = doc["documentKey"]["_id"]

            docs_grouped[collection].append(d)

        for coll, docs_details in docs_grouped.items():
            self.transform_and_load_many(docs_details)
            # every 5 minutes update the timestamp because we need to continue
            # tailing in case of disconnecting from the PGDB
            diff = datetime.utcnow() - updated_at
            minutes_between_update = (diff.seconds // 60) % 60
            if minutes_between_update > 5:
                t = int(datetime.utcnow().timestamp())
                transfer_info.update_latest_successful_ts(
                    self.pg, self.schema, t)
                logger.info("%s Updated latest_successful_ts: %d" %
                            (CURR_FILE, t))
Ejemplo n.º 22
0
def create_map(mongo, name_db):
    coll_map = {name_db: {}}
    colls = collection.get_all(mongo)

    for coll in sorted(colls):
        logger.info('Determining types for collection %s...' % coll, CURR_FILE)

        # TODO: replace snake_case to another file e.g util
        name_relation = tc.snake_case(coll)

        coll_map[name_db][coll] = {
            ":columns": [],
            ':meta': {
                ':table': name_relation,
                ':extra_props': 'JSONB'
            }
        }
        docs = collection.get_docs_for_type_check(mongo, coll)
        logger.info("Reading samples...", CURR_FILE)

        types = get_types(docs)

        # TODO: handle None
        for field, value in types.items():
            type_chosen = "text"
            if len(field) > 1:
                sum = docs.count()
                max = 0
                for k, v in value.items():
                    if k is None:
                        continue
                    curr_perc = v / sum
                    if curr_perc > 0 and curr_perc > max:
                        max = curr_perc
                        type_chosen = k
            else:
                # there is exactly one key which will be the
                # chosen type
                type_chosen = list(value.keys())[0]
            name_column = tc.snake_case(field)
            def_column = {
                name_column: None,
                ":source": field,
                ":type": type_chosen.upper()
            }
            coll_map[name_db][coll][":columns"].append(def_column)
    return coll_map
Ejemplo n.º 23
0
    def __init__(self, conn_details, ttw=1):
        logger.info("Connecting to %s" % conn_details, CURR_FILE)

        # time to wait before attempt to reconnect
        self.ttw = ttw
        self.conn_details = conn_details
        self.cmd_latest = None
        self.values_latest = None
        self.function_latest = None
        self.query_failed = False

        if ttw == 1:
            self.attempt_to_reconnect = False
        try:
            self.conn = psycopg2.connect(self.conn_details)
            self.conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

            self.cur = self.conn.cursor()
            logger.info("Connected to Postgres.", CURR_FILE)
            self.ttw = 1
            if self.query_failed is True:
                """
                the latest command should be repeated because Postgres
                was disconnected and it is likely that it was during a
                command
                """
                self.query_failed = False
                if self.function_latest is FunctionLatest.EXECUTE:
                    self.execute_cmd(self.cmd_latest, self.values_latest)
                elif self.function_latest is FunctionLatest.EXECUTE_WITH_FETCH:
                    self.execute_cmd_with_fetch(self.cmd_latest,
                                                self.values_latest)
                elif self.function_latest is FunctionLatest.EXECUTE_MANY:
                    self.execute_many_cmd(self.cmd_latest, self.values_latest)

        except Exception as ex:
            self.attempt_to_reconnect = True
            msg = """
                Could not connect to Postgres.
                Reconnecting in %s seconds...
                Details: %s
                """ % (self.ttw, ex)
            self.log_error_in_pg(msg)
            time.sleep(self.ttw)

            self.__init__(self.conn_details, self.ttw * 2)
Ejemplo n.º 24
0
def check(db, colls_requested):
    """
    Checks if requested collections exist in the database.
    Gets all collection names from MongoDB (colls_name) and creates
    a new list which contains only the existing collection names.

    Parameters
    ----------
    db : pymongo.database.Database
      Database connection and name
    colls_requested : list
      Contains the list of requested collection names.

    Returns
    -------
    colls_existing : list
      Contains only existing collection names.

    Raises
    ------
    ValueError
      when a requested collection does not exist in the database (MongoDB)

    Example
    -------
    check(db, ['Car'])
      []
    check(db, ['Region', 'Customer']
      ['Region', 'Customer']
    """
    colls_name = db.collection_names(include_system_collections=False)
    colls_existing = []
    logger.info('[COLLECTION] Checking collection names...')
    try:
        for coll in colls_requested:
            try:
                colls_name.index(coll)
                colls_existing.append(coll)
            except ValueError:
                logger.warn("""
                    [COLLECTION] '%s' is not in the Mongo database.
                    Skipping data transfer""" % coll)
    except Exception as ex:
        logger.error("[COLLECTION] Checking collection names failed: %s" % ex)
    return colls_existing
Ejemplo n.º 25
0
def get_by_name_reduced(db, name, fields, size=20000):
    """
    Gets data from collection limited by batch size containing
    only specific fields.

    Parameters
    ----------
    db : pymongo.database.Database
      Database connection and name
    name : string
      Name of collection.
    fields : list
      Names of fields to include in the query.

    Returns
    -------
    docs : pymongo.cursor.Cursor

    Raises
    ------

    Example
    -------
    get_by_name(db, 'Car', ['_id', 'type', 'nfOfSeats'])

    TODO
    ----
    - let the user decide batch size
    """
    docs = []
    try:
        logger.info('[COLLECTION] Loading data from collection %s...' % name)
        c = db[name]
        # create the document given to a query that specifies which
        # fields MongoDB returns in the result set
        projection = {}
        for field in fields:
            projection[field] = 1
        bz = c.find({}, projection).sort('$natural', pymongo.DESCENDING)
        docs = bz.batch_size(size)
    except Exception as ex:
        logger.error("""[COLLECTION] Loading data from collection %s failed.
            Details: %s""" % (name, ex))
    return docs
Ejemplo n.º 26
0
def run_collection(conn, coll, stop):
    cursor = conn["booster"][coll].watch([{
        '$match': {
            'operationType': {
                '$in': [UPDATE, INSERT, DELETE]
            }
        }
    }])

    for doc in cursor:
        col = doc["ns"]["db"] + "." + doc["ns"]["coll"]
        op = doc["operationType"]

        if op in [INSERT, UPDATE, DELETE]:
            DATA_QUEUE.put(doc)

        if stop():
            break

    logger.info("%s Thread %s stopped." % (CURR_FILE, coll))
    cursor.close()
Ejemplo n.º 27
0
def get_column_names_and_types(db, schema, table):
    """
    Get column names and column types of a specific table.
    Parameters
    ----------
    table_name: str
    Returns
    -------
    List of column names and corresponding types.
    """
    cmd = """
    SELECT column_name, data_type FROM information_schema.columns
    WHERE table_schema='%s' AND table_name = '%s';
    """ % (schema, table.lower())
    logger.info("[TABLE] Checking columns and types for table %s.%s" %
                (schema, table))
    try:
        rows = db.execute_cmd_with_fetch(cmd)
        return rows
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Ejemplo n.º 28
0
 def convert_columns(self, name_table, source, fields_cur, fields_new):
     """
     (1) Tries to convert the column
     (2) TODO: If (1) was not successful (PG could not
     convert the column), just rename it and add
     the column again so Purr can take care of it
     """
     for i in range(0, len(fields_new)):
         field = fields_new[i]
         if field[":source"] in source:
             for column, v in field.items():
                 if v is None:
                     type_old = fields_cur[i][":type"]
                     type_new = field[":type"]
                     if tc.is_convertable(type_old, type_new):
                         logger.info(
                             """%s table %s, column %s:
                             Type [%s] is convertable to [%s]""" % (
                                 CURR_FILE,
                                 name_table,
                                 column,
                                 type_old,
                                 type_new
                             ))
                         table.column_change_type(
                             self.pg,
                             self.schema,
                             name_table,
                             column,
                             type_new)
                     else:
                         logger.error("""
                             %s In table %s, column %s:
                             Type [%s] is NOT convertable to [%s]
                             """ % (
                             CURR_FILE,
                             name_table,
                             column,
                             type_old,
                             type_new))
Ejemplo n.º 29
0
def get_docs_for_type_check(db, name, nr_of_docs=100):
    """
    Gets data from a collection limited.

    Parameters
    ----------
    db : pymongo.database.Database
      Database connection
    name : string
      Name of collection.
    nr_of_docs : integer
      Number of documents to return

    Returns
    -------
    docs : pymongo.cursor.Cursor

    Raises
    ------

    Example
    -------
    get_docs_for_type_check(db, 'Car')

    TODO
    ----
    - let the user decide batch size
    """
    docs = []
    try:
        logger.info('[COLLECTION] Loading data from collection %s...' % name)
        c = db[name]
        docs = c.find().sort('$natural',
                             pymongo.DESCENDING).skip(0).limit(nr_of_docs)
    except Exception as ex:
        logger.error("""
          [COLLECTION] Loading data from collection %s failed.
          Details: %s""" % (name, ex))
    return docs
Ejemplo n.º 30
0
def create_table(db, coll_map, schema='public'):
    """
  Adds primary key to a PostgreSQL table.
  Parameters
  ----------
  Returns
  -------
  -
  Example
  -------
  create_table(pg, 'purr')
  """
    table_name = "purr_collection_map"
    attrs = [
        "id", "collection_name", "relation_name", "types", "updated_at",
        "query_update"
    ]
    types = ["integer", "text", "text", "jsonb[]", "timestamp", "text"]

    try:
        # TODO: make this fucntion accept string and list
        table.drop(db, schema, [table_name])
        table.create(db, schema, table_name, attrs, types)
        logger.info("Created table %s.%s." % (schema, table_name), CURR_FILE)
    except Exception as ex:
        logger.error(
            """
            Failed to create table %s.%s: %s
            """ % (schema, table_name, ex), CURR_FILE)

    populate_table(db, coll_map, table_name, attrs, schema)
    procedure_name = 'notify_type'
    procedure.drop_type_notification(db, procedure_name)
    procedure.create_type_notification(db, procedure_name)
    table.drop_trigger_type_notification(db, 'public', 'purr_collection_map',
                                         'notify', procedure_name)
    table.create_trigger_type_notification(db, 'public', 'purr_collection_map',
                                           'notify', procedure_name)