Ejemplo n.º 1
0
    def create_relationships_query(self, multiple=False):

        rels_queries = []

        if multiple:
            for relations in self.schema.fetch_all_relations(self.cfg, self.db):
                if not relations:
                    continue
                for columns, joins in relations:
                    rels_queries.append(generate_iter_query(columns, joins))
            qs = []
            for i, q in enumerate(rels_queries, start=1):
                qs.append(
                    self.generate_tsvfile_output_query(q,
                        self.relations_filename.replace('.csv', '.%04d.csv' % i)))
            return "\n".join(qs)
        else:
            for relations in self.schema.fetch_all_relations(self.cfg, self.db, self.all_relations_properties):
                if not relations:
                    continue
                for columns, joins in relations:
                    rels_queries.append(generate_iter_query(columns, joins))
            return self.generate_tsvfile_output_query(
                generate_union_query(rels_queries),
                self.relations_filename)
Ejemplo n.º 2
0
    def create_nodes_query(self, multiple=False):

        node_queries = []
        for columns, joins in self.schema.fetch_all(self.cfg, self.db,
            self.all_properties if not multiple else []):
            if columns and joins:
                node_queries.append(generate_iter_query(columns, joins,
                    limit=self.entity_limit))

        headers = None

        if self.nodes_header_override:
            headers = dict([(name, name) for (name, maptype) in self.all_properties])
            headers.update(self.nodes_header_override)

        if multiple:
            qs = []
            for i, q in enumerate(node_queries, start=1):
                qs.append(
                    self.generate_tsvfile_output_query(
                        """\n%s\nORDER BY pk\n""" % q,
                        self.nodes_filename.replace('.csv', '.%04d.csv' % i),
                        headers)
                )
            return "\n".join(qs)
        else:
            ordered_union_query = """\n%s\nORDER BY kind, pk\n""" % generate_union_query(node_queries)

            return self.generate_tsvfile_output_query(
                ordered_union_query,
                self.nodes_filename,
                headers)
Ejemplo n.º 3
0
    def create_relationships_query(self, multiple=False):

        rels_queries = []

        if multiple:
            for relations in self.schema.fetch_all_relations(
                    self.cfg, self.db):
                if not relations:
                    continue
                for columns, joins in relations:
                    rels_queries.append(generate_iter_query(columns, joins))
            qs = []
            for i, q in enumerate(rels_queries, start=1):
                qs.append(
                    self.generate_tsvfile_output_query(
                        q,
                        self.relations_filename.replace(
                            '.csv', '.%04d.csv' % i)))
            return "\n".join(qs)
        else:
            for relations in self.schema.fetch_all_relations(
                    self.cfg, self.db, self.all_relations_properties):
                if not relations:
                    continue
                for columns, joins in relations:
                    rels_queries.append(generate_iter_query(columns, joins))
            return self.generate_tsvfile_output_query(
                generate_union_query(rels_queries), self.relations_filename)
Ejemplo n.º 4
0
    def create_nodes_query(self, multiple=False):

        node_queries = []
        for columns, joins in self.schema.fetch_all(
                self.cfg, self.db,
                self.all_properties if not multiple else []):
            if columns and joins:
                node_queries.append(
                    generate_iter_query(columns,
                                        joins,
                                        limit=self.entity_limit,
                                        order_by='pk'))

        #node_queries = ["""\n%s\nORDER BY pk\n""" % q for q in node_queries]
        headers = None

        if self.nodes_header_override:
            # start with 1-to-1 name map
            headers = dict([(name, name)
                            for (name, maptype) in self.all_properties])

            # fix some headers
            headers.update(self.nodes_header_override)

        if multiple:
            qs = []
            for i, q in enumerate(node_queries, start=1):
                qs.append(
                    self.generate_tsvfile_output_query(
                        """\n%s\nORDER BY pk\n""" % q,
                        self.nodes_filename.replace('.csv', '.%04d.csv' % i),
                        headers))
            return "\n".join(qs)
        else:
            #ordered_union_query = """\n%s\nORDER BY kind, pk\n""" % generate_union_query(node_queries)
            ordered_union_query = """\n%s\nORDER BY kind, pk\n""" % generate_union_query(
                node_queries)

            return self.generate_tsvfile_output_query(ordered_union_query,
                                                      self.nodes_filename,
                                                      headers)
Ejemplo n.º 5
0
    def create_mapping_table_query(self, multiple=False):
        print """
-- Create the mapping table
-- between (entity, pk) tuples and incrementing node IDs
"""
        node_queries = []
        for columns, joins in self.schema.fetch_all(self.cfg, self.db,
                            [(n,t) for n, t in self.all_properties if n in ('kind', 'pk')]):
            if columns and joins:
                node_queries.append(generate_iter_query(columns, joins,
                    limit=self.entity_limit))

        if multiple:

            query = """
CREATE TEMPORARY TABLE entity_mapping
(
    node_id             SERIAL,
    entity              TEXT,
    pk                  BIGINT
);
"""

            insert_entity_query = """
INSERT INTO entity_mapping
    (entity, pk)
%s
ORDER BY pk;\n"""
            for q in node_queries:
                query += insert_entity_query % indent(q, '    ')

            query += """-- create index to speedup lookups
CREATE INDEX ON entity_mapping (entity, pk);

ANALYZE entity_mapping;
"""
            return query

        else:

            mapping_query = """
SELECT
    kind AS entity,
    pk,
    row_number() OVER (ORDER BY kind, pk) as node_id
FROM
(
%s
)
AS entity_union \n""" % indent(generate_union_query(node_queries), '    ')

            temp_mapping_table = """
DROP TABLE IF EXISTS entity_mapping;

CREATE TEMPORARY TABLE entity_mapping AS
(
%s
);

-- create index to speedup lookups
CREATE INDEX ON entity_mapping (entity, pk);

ANALYZE entity_mapping;

""" % indent(mapping_query, '    ')

            return temp_mapping_table
Ejemplo n.º 6
0
    def create_mapping_table_query(self, multiple=False):
        print("""
-- Create the mapping table
-- between (entity, pk) tuples and incrementing node IDs
""")
        node_queries = []
        for columns, joins in self.schema.fetch_all(
                self.cfg, self.db,
            [(n, t) for n, t in self.all_properties if n in ('kind', 'pk')]):
            if columns and joins:
                node_queries.append(
                    generate_iter_query(columns,
                                        joins,
                                        limit=self.entity_limit,
                                        order_by='pk'))

        if multiple:

            query = """
CREATE TEMPORARY TABLE entity_mapping
(
    node_id             SERIAL,
    entity              TEXT,
    pk                  BIGINT
);
"""

            insert_entity_query = """
INSERT INTO entity_mapping
    (entity, pk)
%s
ORDER BY pk;\n"""
            for q in node_queries:
                query += insert_entity_query % indent(q, '    ')

            query += """-- create index to speedup lookups
CREATE INDEX ON entity_mapping (entity, pk);

ANALYZE entity_mapping;
"""
            return query

        else:

            mapping_query = """
SELECT
    kind AS entity,
    pk,
    row_number() OVER (ORDER BY kind, pk) as node_id
FROM
(
%s
)
AS entity_union \n""" % indent(generate_union_query(node_queries), '    ')

            temp_mapping_table = """
DROP TABLE IF EXISTS entity_mapping;

CREATE TEMPORARY TABLE entity_mapping AS
(
%s
);

-- create index to speedup lookups
CREATE INDEX ON entity_mapping (entity, pk);

ANALYZE entity_mapping;

""" % indent(mapping_query, '    ')

            return temp_mapping_table