Beispiel #1
0
 def importTableCsv(self, tableObj):
     '''Imports a table to Neo4j.'''
     if not (tableObj.isManyToMany() \
        and MANY_TO_MANY_AS_RELATION):
         #Standard table import
         LOG.info("Importing %s..." % tableObj.labelName)
         #Match column names with their respective import expression
         colnames = [x for x in tableObj.importCols.keys()]
         colImpExpr = [
             col.impFunc("csvLine.%s") % name
             for name, col in tableObj.importCols.items()
         ]
         cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)]
         colClause = string.join(cols, ',')
         createClause = "CREATE (n:%s { %s})" % (tableObj.labelName,
                                                 colClause)
         for f in tableObj.filesWritten:
             periodicCommitClause = "USING PERIODIC COMMIT %s " \
                                     % self.periodicCommit
             targetFileName = getTargetFilename(f)
             importClause = "LOAD CSV WITH HEADERS " + \
             "FROM 'file:%s' AS csvLine " % targetFileName
             cypherQuery = periodicCommitClause + importClause + \
                             createClause
             self.cypher_exec(cypherQuery)
     else:
         #Not necessary to import many-to-many tables. So don't.
         LOG.info("Skipping many-to-many table %s..." % tableObj.labelName)
Beispiel #2
0
 def createRelationsFk(self, fKey):
     '''Create relations on Neo4j, based on an sql foreign key.'''
     fkLabel = fKey.table.labelName
     pkLabel = fKey.refTable.labelName
     #Both nodes will be matched against their own primary keys
     #The pattern is like:
     # MATCH (referencing:{referencing primary key}),
     #       (referenced:{referenced primary key})
     fkColsImportExpr = [(name, col.impFunc("csvLine.%s") % name)
                         for name, col in fKey.table.pkCols.items()]
     fkCols = string.join(["%s: %s" % tup for tup in fkColsImportExpr], ",")
     #We need to match the name of the field on the primary key table
     #with the name of the field on the foreign key table
     pkColsImportExpr = [(fkColName,
                          pkCol.impFunc("csvLine.%s") % pkName) \
                          for (pkName, pkCol), fkColName in \
                          zip(fKey.consCols.items(), fKey.refCols.keys())]
     pkCols = string.join(["%s: %s" % tup for tup in pkColsImportExpr], ",")
     relType = fKey.relType
     LOG.info("Foreign key to table %s..." % pkLabel)
     #Emit one statement per file written
     for filename in fKey.table.filesWritten:
         targetFileName = getTargetFilename(filename)
         statement = self.relStatementPat % (
             self.periodicCommit, targetFileName, pkLabel, pkCols, fkLabel,
             fkCols, relType, "")
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #3
0
    def _setIndexedCols(self):
        """Decides which fields must be indexed, based on key and index
        information."""
        # Find all columns that are indexed or unique indexed
        uniq = self.sqlDb.inspector.get_unique_constraints(self.tableName)
        idx = self.sqlDb.inspector.get_indexes(self.tableName)
        # Only single-field constraints will be carried over as such
        uniqCols = [x["column_names"] for x in uniq if len(x["column_names"]) == 1]
        idxCols = [x["column_names"] for x in idx]
        idxCols.extend([x["column_names"] for x in uniq if len(x["column_names"]) != 1])
        # Don't forget primary key constraint
        if len(self.pkCols) == 1:
            uniqCols.append(self.pkCols.keys())
        else:
            idxCols.append(self.pkCols.keys())

        uniqColNames = listUnique(listFlatten(uniqCols))
        # Remove unique constrained columns from columns to be indexed
        idxColNames = listSubtract(listUnique(listFlatten(idxCols)), uniqColNames)
        self.uniqCols = [self.cols[x] for x in uniqColNames]
        # Redundant fields are excluded
        if REMOVE_REDUNDANT_FIELDS:
            self.idxCols = [self.cols[x] for x in idxColNames if not self.cols[x].isRedundant()]
        else:
            self.idxCols = [self.cols[x] for x in idxColNames]
        LOG.debug("Unique constraints on table %s, columns %s" % (self.tableName, str([x.name for x in self.uniqCols])))
        LOG.debug("Indexes on table %s, columns %s" % (self.tableName, str([x.name for x in self.idxCols])))
Beispiel #4
0
    def _setIndexedCols(self):
        '''Decides which fields must be indexed, based on key and index
        information.'''
        #Find all columns that are indexed or unique indexed
        uniq = self.sqlDb.inspector.get_unique_constraints(self.tableName)
        idx = self.sqlDb.inspector.get_indexes(self.tableName)
        #Only single-field constraints will be carried over as such
        uniqCols = [x['column_names'] for x in uniq \
                    if len(x['column_names']) == 1]
        idxCols = [x['column_names'] for x in idx]
        idxCols.extend([x['column_names'] for x in uniq \
                        if len(x['column_names']) != 1])
        #Don't forget primary key constraint
        if len(self.pkCols) == 1:
            uniqCols.append(self.pkCols.keys())
        else:
            idxCols.append(self.pkCols.keys())

        uniqColNames = listUnique(listFlatten(uniqCols))
        #Remove unique constrained columns from columns to be indexed
        idxColNames = listSubtract(listUnique(listFlatten(idxCols)),
                                   uniqColNames)
        self.uniqCols = [self.cols[x] for x in uniqColNames]
        #Redundant fields are excluded
        if REMOVE_REDUNDANT_FIELDS:
            self.idxCols = [self.cols[x] for x in idxColNames \
                            if not self.cols[x].isRedundant()]
        else:
            self.idxCols = [self.cols[x] for x in idxColNames]
        LOG.debug("Unique constraints on table %s, columns %s" %
                  (self.tableName, str([x.name for x in self.uniqCols])))
        LOG.debug("Indexes on table %s, columns %s" %
                  (self.tableName, str([x.name for x in self.idxCols])))
Beispiel #5
0
 def importTableCsv(self, tableObj):
     '''Imports a table to Neo4j.'''
     if not (tableObj.isManyToMany() \
        and MANY_TO_MANY_AS_RELATION):
         #Standard table import
         LOG.info("Importing %s..." % tableObj.labelName)
         #Match column names with their respective import expression
         colnames = [x for x in tableObj.importCols.keys()]
         colImpExpr = [col.impFunc("csvLine.%s") % name
                       for name, col in tableObj.importCols.items()]
         cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)]
         colClause = string.join(cols, ',')
         createClause = "CREATE (n:%s { %s})" % (tableObj.labelName,
                                                 colClause)
         for f in tableObj.filesWritten:
             periodicCommitClause = "USING PERIODIC COMMIT %s " \
                                     % self.periodicCommit
             targetFileName = getTargetFilename(f)
             importClause = "LOAD CSV WITH HEADERS " + \
             "FROM 'file:%s' AS csvLine " % targetFileName
             cypherQuery = periodicCommitClause + importClause + \
                             createClause
             self.cypher_exec(cypherQuery)
     else:
         #Not necessary to import many-to-many tables. So don't.
         LOG.info("Skipping many-to-many table %s..." % tableObj.labelName)
Beispiel #6
0
 def createRelationsFk(self, fKey):
     '''Create relations on Neo4j, based on an sql foreign key.'''
     fkLabel = fKey.table.labelName
     pkLabel = fKey.refTable.labelName
     #Both nodes will be matched against their own primary keys
     #The pattern is like:
     # MATCH (referencing:{referencing primary key}),
     #       (referenced:{referenced primary key})
     fkColsImportExpr = [(name, col.impFunc("csvLine.%s") %
                          name) for name, col in fKey.table.pkCols.items()]
     fkCols = string.join(["%s: %s" % tup for tup in fkColsImportExpr],
                                 ",")
     #We need to match the name of the field on the primary key table
     #with the name of the field on the foreign key table
     pkColsImportExpr = [(fkColName,
                          pkCol.impFunc("csvLine.%s") % pkName) \
                          for (pkName, pkCol), fkColName in \
                          zip(fKey.consCols.items(), fKey.refCols.keys())]
     pkCols = string.join(["%s: %s" % tup
                                  for tup in pkColsImportExpr], ",")
     relType = fKey.relType
     LOG.info("Foreign key to table %s..." % pkLabel)
     #Emit one statement per file written
     for filename in fKey.table.filesWritten:
         targetFileName = getTargetFilename(filename)
         statement = self.relStatementPat % (self.periodicCommit,
                                             targetFileName, pkLabel,
                                             pkCols, fkLabel,
                                             fkCols, relType, "")
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #7
0
 def createIndexes(self, tableObj):
     '''Creates indexes on Neo4j.'''
     label = tableObj.labelName
     LOG.info("Creating indexes on %s..." % label)
     for col in tableObj.idxCols:
         statement = "create index on :%s(%s)" % (label, col.name)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #8
0
 def createIndexes(self, tableObj):
     '''Creates indexes on Neo4j.'''
     label = tableObj.labelName
     LOG.info("Creating indexes on %s..." % label)
     for col in tableObj.idxCols:
         statement = "create index on :%s(%s)" % (label, col.name)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #9
0
 def createConstraints(self, tableObj):
     '''Creates unique constraints on Neo4j.'''
     label = tableObj.labelName
     LOG.info("Creating constraint on %s..." % tableObj.labelName)
     for col in tableObj.uniqCols:
         statement = """create constraint on (n:%s)
         assert n.%s is unique""" % (label, col.name)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #10
0
 def createConstraints(self, tableObj):
     '''Creates unique constraints on Neo4j.'''
     label = tableObj.labelName
     LOG.info("Creating constraint on %s..." % tableObj.labelName)
     for col in tableObj.uniqCols:
         statement = """create constraint on (n:%s)
         assert n.%s is unique""" % (label, col.name)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #11
0
 def manyToManyRelations(self, tableObj):
     '''Transfers a many-to-many table as relationships in Neo4j'''
     #One can never know what's going to go wrong...
     assert len(tableObj.fKeys) == 2
     #We need to know names of fields on two foreign key tables
     src = tableObj.fKeys[0]
     dest = tableObj.fKeys[1]
     pk1Label = src.refTable.labelName
     pk2Label = dest.refTable.labelName
     pk1ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") %
                          pkName) for (pkName, pkCol), refColName in \
                          zip(src.consCols.items(), src.refCols.keys())]
     pk1Cols = string.join(["%s: %s" % tup for tup in pk1ColsImportExpr],
                           ",")
     pk2ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") %
                           pkName) for (pkName, pkCol), refColName in \
                          zip(dest.consCols.items(), dest.refCols.keys())]
     pk2Cols = string.join(["%s: %s" % tup for tup in pk2ColsImportExpr],
                           ",")
     assert hasattr(tableObj, 'relType')
     relType = tableObj.relType
     LOG.info("Importing many-to-many table %s as relationships..." %
              tableObj.tableName)
     colnames = [x for x in tableObj.importCols.keys()]
     colImpExpr = [
         col.impFunc("csvLine.%s") % name
         for name, col in tableObj.importCols.items()
     ]
     cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)]
     colClause = "{%s}" % string.join(cols, ',') if cols else ""
     for filename in tableObj.filesWritten:
         targetFileName = getTargetFilename(filename)
         statement = self.relStatementPat % (
             self.periodicCommit, targetFileName, pk2Label, pk2Cols,
             pk1Label, pk1Cols, relType, colClause)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #12
0
 def manyToManyRelations(self, tableObj):
     '''Transfers a many-to-many table as relationships in Neo4j'''
     #One can never know what's going to go wrong...
     assert len(tableObj.fKeys) == 2
     #We need to know names of fields on two foreign key tables
     src = tableObj.fKeys[0]
     dest = tableObj.fKeys[1]
     pk1Label = src.refTable.labelName
     pk2Label = dest.refTable.labelName
     pk1ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") %
                          pkName) for (pkName, pkCol), refColName in \
                          zip(src.consCols.items(), src.refCols.keys())]
     pk1Cols = string.join(["%s: %s" % tup for tup in pk1ColsImportExpr],
                                 ",")
     pk2ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") %
                           pkName) for (pkName, pkCol), refColName in \
                          zip(dest.consCols.items(), dest.refCols.keys())]
     pk2Cols = string.join(["%s: %s" % tup
                                  for tup in pk2ColsImportExpr], ",")
     assert hasattr(tableObj, 'relType')
     relType = tableObj.relType
     LOG.info("Importing many-to-many table %s as relationships..." %
              tableObj.tableName)
     colnames = [x for x in tableObj.importCols.keys()]
     colImpExpr = [col.impFunc("csvLine.%s") % name
                   for name, col in tableObj.importCols.items()]
     cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)]
     colClause = "{%s}" % string.join(cols, ',') if cols else ""
     for filename in tableObj.filesWritten:
         targetFileName = getTargetFilename(filename)
         statement = self.relStatementPat % (self.periodicCommit,
                                             targetFileName, pk2Label,
                                             pk2Cols, pk1Label,
                                             pk1Cols, relType, colClause)
         LOG.debug(statement)
         self.cypher_exec(statement)
Beispiel #13
0
 def export(self):
     '''Export all tables'''
     for tblName, tblObject in self.tables.items():
         LOG.info("Exporting %s..." % tblName)
         tblObject.export()
Beispiel #14
0
 def export(self):
     """Export all tables"""
     for tblName, tblObject in self.tables.items():
         LOG.info("Exporting %s..." % tblName)
         tblObject.export()