def generate_tsvfile_output_query(cls, query, output_filename, modify_headers={}): if modify_headers: select_lines = [] for incols, outcols in modify_headers.items(): # simple column renaming if isinstance(incols, (str, )): if outcols is not None: select_lines.append("wrapped.%s AS %s" % (incols, outcols)) # merging columns elif isinstance(incols, (tuple, )): infunc, outname = outcols k = infunc(*["wrapped.%s::text" % c for c in incols]) select_lines.append("%s AS %s" % (k, outname)) select_lines = ",\n".join(select_lines) query = """ SELECT %(fields)s FROM ( %(query)s ) AS wrapped """ % dict(query=indent(query, ' '), fields=indent(select_lines, ' ')) return """ COPY( %(query)s ) TO '%(filename)s' CSV HEADER DELIMITER E'\\t' ENCODING '%(encoding)s'; """ % dict(query=indent(query, ' '), filename=output_filename, encoding=cls.output_encoding)
def generate_tsvfile_output_query(cls, query, output_filename, modify_headers={}): if modify_headers: select_lines = ",\n".join( ["wrapped.%s AS %s" % (k, v) for k, v in modify_headers.iteritems()] ) query= """ SELECT %(fields)s FROM ( %(query)s ) AS wrapped """ % dict(query=indent(query, ' '), fields=indent(select_lines, ' ')) return """ COPY( %(query)s ) TO '%(filename)s' CSV HEADER DELIMITER E'\\t'; """ % dict(query=indent(query, ' '), filename=output_filename)
def generate_tsvfile_output_query(cls, query, output_filename, modify_headers={}): if modify_headers: select_lines = [] for incols, outcols in modify_headers.items(): # simple column renaming if isinstance(incols, (str,)): if outcols is not None: select_lines.append("wrapped.%s AS %s" % (incols, outcols)) # merging columns elif isinstance(incols, (tuple,)): infunc, outname = outcols k = infunc(*["wrapped.%s::text" % c for c in incols]) select_lines.append("%s AS %s" % (k, outname)) select_lines = ",\n".join(select_lines) query= """ SELECT %(fields)s FROM ( %(query)s ) AS wrapped """ % dict(query=indent(query, ' '), fields=indent(select_lines, ' ')) return """ COPY( %(query)s ) TO '%(filename)s' CSV HEADER DELIMITER E'\\t' ENCODING '%(encoding)s'; """ % dict(query=indent(query, ' '), filename=output_filename, encoding=cls.output_encoding)
def create_mapping_table_query(self, multiple=False): print """ -- Create the mapping table -- between (entity, pk) tuples and incrementing node IDs """ node_queries = [] for columns, joins in self.schema.fetch_all(self.cfg, self.db, [(n,t) for n, t in self.all_properties if n in ('kind', 'pk')]): if columns and joins: node_queries.append(generate_iter_query(columns, joins, limit=self.entity_limit)) if multiple: query = """ CREATE TEMPORARY TABLE entity_mapping ( node_id SERIAL, entity TEXT, pk BIGINT ); """ insert_entity_query = """ INSERT INTO entity_mapping (entity, pk) %s ORDER BY pk;\n""" for q in node_queries: query += insert_entity_query % indent(q, ' ') query += """-- create index to speedup lookups CREATE INDEX ON entity_mapping (entity, pk); ANALYZE entity_mapping; """ return query else: mapping_query = """ SELECT kind AS entity, pk, row_number() OVER (ORDER BY kind, pk) as node_id FROM ( %s ) AS entity_union \n""" % indent(generate_union_query(node_queries), ' ') temp_mapping_table = """ DROP TABLE IF EXISTS entity_mapping; CREATE TEMPORARY TABLE entity_mapping AS ( %s ); -- create index to speedup lookups CREATE INDEX ON entity_mapping (entity, pk); ANALYZE entity_mapping; """ % indent(mapping_query, ' ') return temp_mapping_table
def create_mapping_table_query(self, multiple=False): print(""" -- Create the mapping table -- between (entity, pk) tuples and incrementing node IDs """) node_queries = [] for columns, joins in self.schema.fetch_all( self.cfg, self.db, [(n, t) for n, t in self.all_properties if n in ('kind', 'pk')]): if columns and joins: node_queries.append( generate_iter_query(columns, joins, limit=self.entity_limit, order_by='pk')) if multiple: query = """ CREATE TEMPORARY TABLE entity_mapping ( node_id SERIAL, entity TEXT, pk BIGINT ); """ insert_entity_query = """ INSERT INTO entity_mapping (entity, pk) %s ORDER BY pk;\n""" for q in node_queries: query += insert_entity_query % indent(q, ' ') query += """-- create index to speedup lookups CREATE INDEX ON entity_mapping (entity, pk); ANALYZE entity_mapping; """ return query else: mapping_query = """ SELECT kind AS entity, pk, row_number() OVER (ORDER BY kind, pk) as node_id FROM ( %s ) AS entity_union \n""" % indent(generate_union_query(node_queries), ' ') temp_mapping_table = """ DROP TABLE IF EXISTS entity_mapping; CREATE TEMPORARY TABLE entity_mapping AS ( %s ); -- create index to speedup lookups CREATE INDEX ON entity_mapping (entity, pk); ANALYZE entity_mapping; """ % indent(mapping_query, ' ') return temp_mapping_table