def _bulk_upsert(self, documents, namespace): with self.pgsql.cursor() as cursor: document_buffer = [] insert_accumulator = 0 for document in documents: document_buffer.append(document) insert_accumulator += 1 if insert_accumulator % self.chunk_size == 0: sql_bulk_insert(cursor, self.mappings, namespace, document_buffer, quiet=self.quiet) self.commit() document_buffer = [] LOG.info('%s %s copied...', insert_accumulator, namespace) sql_bulk_insert(cursor, self.mappings, namespace, document_buffer, quiet=self.quiet) self.commit()
def upsert(self, doc, namespace, timestamp): if not is_mapped(self.mappings, namespace): return try: with self.pgsql.cursor() as cursor: self._upsert(namespace, doc, cursor, timestamp) self.commit() except Exception as e: LOG.error("Impossible to upsert %s to %s\n%s", doc, namespace, traceback.format_exc())
def upsert(self, doc, namespace, timestamp): if not is_mapped(self.mappings, namespace): return try: with self.pgsql.cursor() as cursor: self._upsert(namespace, doc, cursor, timestamp) self.commit() except psycopg2.Error: LOG.error(u"Impossible to upsert %s to %s", doc, namespace) if not self.quiet: LOG.error(u"Traceback:\n%s", traceback.format_exc())
def bulk_upsert(self, documents, namespace, timestamp): LOG.info('Inspecting %s...', namespace) if is_mapped(self.mappings, namespace): LOG.info('Mapping found for %s !...', namespace) LOG.info('Deleting all rows before update %s !...', namespace) db, collection = db_and_collection(namespace) for linked_table in self.get_linked_tables(db, collection): sql_delete_rows(self.pgsql.cursor(), linked_table) sql_delete_rows(self.pgsql.cursor(), collection) self.commit() self._bulk_upsert(documents, namespace) LOG.info('%s done.', namespace)
def _init_schema(self): self.prepare_mappings() try: for database in self.mappings: foreign_keys = [] with self.pgsql.cursor() as cursor: for collection in self.mappings[database]: self.insert_accumulator[collection] = 0 pk_found = False pk_name = self.mappings[database][collection]['pk'] columns = ['_creationdate TIMESTAMP'] indices = [u"INDEX idx_{0}__creation_date ON {0} (_creationdate DESC)".format(collection)] + \ self.mappings[database][collection].get('indices', []) for column in self.mappings[database][collection]: column_mapping = self.mappings[database][ collection][column] if 'dest' in column_mapping: name = column_mapping['dest'] column_type = column_mapping['type'] nullable = column_mapping.get('nullable', True) constraints = '' if name == pk_name: constraints = "CONSTRAINT {0}_PK PRIMARY KEY".format( collection.upper()) pk_found = True if not nullable: constraints = '{} NOT NULL'.format( constraints) if column_type != ARRAY_TYPE and column_type != ARRAY_OF_SCALARS_TYPE: columns.append(name + ' ' + column_type + ' ' + constraints) if 'index' in column_mapping: indices.append( u"INDEX idx_{2}_{0} ON {1} ({0})". format(name, collection, collection.replace('.', '_'))) if 'fk' in column_mapping: foreign_keys.append({ 'table': column_mapping['dest'], 'ref': collection, 'fk': column_mapping['fk'], 'pk': pk_name }) if not pk_found: columns.append(pk_name + ' SERIAL CONSTRAINT ' + collection.upper() + '_PK PRIMARY KEY') if sql_table_exists(cursor, collection): sql_drop_table(cursor, collection) sql_create_table(cursor, collection, columns) for index in indices: cursor.execute("CREATE " + index) sql_add_foreign_keys(cursor, foreign_keys) self.commit() except psycopg2.Error: LOG.error(u"A fatal error occured during tables creation") if not self.quiet: LOG.error(u"Traceback:\n%s", traceback.format_exc())
def bulk_upsert(self, documents, namespace, timestamp): LOG.info('Inspecting %s...', namespace) if is_mapped(self.mappings, namespace): try: LOG.info('Mapping found for %s !...', namespace) LOG.info('Deleting all rows before update %s !...', namespace) db, collection = db_and_collection(namespace) for linked_table in self.get_linked_tables(db, collection): sql_delete_rows(self.pgsql.cursor(), linked_table) sql_delete_rows(self.pgsql.cursor(), collection) self.commit() self._bulk_upsert(documents, namespace) LOG.info('%s done.', namespace) except psycopg2.Error: LOG.error( "Impossible to bulk insert documents in namespace %s: %s", namespace, documents) if not self.quiet: LOG.error("Traceback:\n%s", traceback.format_exc())
def sql_bulk_insert(cursor, mappings, namespace, documents, quiet=False): queries = [] _sql_bulk_insert(queries, mappings, namespace, documents) for querytree in queries: query = flatten_query_tree([querytree]) with_stmts = [] final_stmt = '' for subquery in query: keyvals = dict(zip(subquery['keys'], subquery['values'])) foreign_keys = {} values = {} for key in keyvals: val = keyvals[key] if isinstance(val, ForeignKey): foreign_keys[key] = val.split('.')[1] else: values[key] = val foreign_keys_sorted = sorted(foreign_keys.keys()) values_sorted = sorted(values.keys()) data_alias = '{0}_data_{1}'.format(subquery['collection'], subquery['idx']) rows_alias = '{0}_rows_{1}'.format(subquery['collection'], subquery['idx']) subquery['alias'] = {'data': data_alias, 'rows': rows_alias} with_stmts.append( '{alias} ({columns}) AS (VALUES ({values}))'.format( alias=data_alias, columns=', '.join(values_sorted), values=', '.join([values[key] for key in values_sorted]))) keys = ', '.join(values_sorted + foreign_keys_sorted) projection = [ '{0}.{1} AS {1}'.format(data_alias, key) for key in values_sorted ] aliases = [data_alias] if 'parent' in subquery: psubquery = query[subquery['parent']] parent_rows_alias = psubquery['alias']['rows'] projection += [ '{0}.{1} AS {2}'.format(parent_rows_alias, foreign_keys[key], key) for key in foreign_keys_sorted ] aliases.append(parent_rows_alias) projection = ', '.join(projection) aliases = ', '.join(aliases) if not subquery['last']: with_stmts.append( '{alias} AS (INSERT INTO {table} ({columns}) SELECT {projection} FROM {aliases} RETURNING {pk})' .format(alias=rows_alias, table=subquery['collection'], columns=keys, projection=projection, aliases=aliases, pk=subquery['pk'])) else: final_stmt = 'INSERT INTO {table} ({columns}) SELECT {projection} FROM {aliases}'.format( table=subquery['collection'], columns=keys, projection=projection, aliases=aliases) sql = 'WITH {0} {1}'.format(', '.join(with_stmts), final_stmt) try: cursor.execute(sql) except psycopg2.Error as e: LOG.error( u"Impossible to upsert document %s in namespace %s: %s\n%s", querytree['document']['mapped'][querytree['pk']], querytree['collection'], e, sql) if not quiet: LOG.error(u"Traceback:\n%s", traceback.format_exc())