def importTableCsv(self, tableObj): '''Imports a table to Neo4j.''' if not (tableObj.isManyToMany() \ and MANY_TO_MANY_AS_RELATION): #Standard table import LOG.info("Importing %s..." % tableObj.labelName) #Match column names with their respective import expression colnames = [x for x in tableObj.importCols.keys()] colImpExpr = [ col.impFunc("csvLine.%s") % name for name, col in tableObj.importCols.items() ] cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)] colClause = string.join(cols, ',') createClause = "CREATE (n:%s { %s})" % (tableObj.labelName, colClause) for f in tableObj.filesWritten: periodicCommitClause = "USING PERIODIC COMMIT %s " \ % self.periodicCommit targetFileName = getTargetFilename(f) importClause = "LOAD CSV WITH HEADERS " + \ "FROM 'file:%s' AS csvLine " % targetFileName cypherQuery = periodicCommitClause + importClause + \ createClause self.cypher_exec(cypherQuery) else: #Not necessary to import many-to-many tables. So don't. LOG.info("Skipping many-to-many table %s..." % tableObj.labelName)
def createRelationsFk(self, fKey): '''Create relations on Neo4j, based on an sql foreign key.''' fkLabel = fKey.table.labelName pkLabel = fKey.refTable.labelName #Both nodes will be matched against their own primary keys #The pattern is like: # MATCH (referencing:{referencing primary key}), # (referenced:{referenced primary key}) fkColsImportExpr = [(name, col.impFunc("csvLine.%s") % name) for name, col in fKey.table.pkCols.items()] fkCols = string.join(["%s: %s" % tup for tup in fkColsImportExpr], ",") #We need to match the name of the field on the primary key table #with the name of the field on the foreign key table pkColsImportExpr = [(fkColName, pkCol.impFunc("csvLine.%s") % pkName) \ for (pkName, pkCol), fkColName in \ zip(fKey.consCols.items(), fKey.refCols.keys())] pkCols = string.join(["%s: %s" % tup for tup in pkColsImportExpr], ",") relType = fKey.relType LOG.info("Foreign key to table %s..." % pkLabel) #Emit one statement per file written for filename in fKey.table.filesWritten: targetFileName = getTargetFilename(filename) statement = self.relStatementPat % ( self.periodicCommit, targetFileName, pkLabel, pkCols, fkLabel, fkCols, relType, "") LOG.debug(statement) self.cypher_exec(statement)
def _setIndexedCols(self): """Decides which fields must be indexed, based on key and index information.""" # Find all columns that are indexed or unique indexed uniq = self.sqlDb.inspector.get_unique_constraints(self.tableName) idx = self.sqlDb.inspector.get_indexes(self.tableName) # Only single-field constraints will be carried over as such uniqCols = [x["column_names"] for x in uniq if len(x["column_names"]) == 1] idxCols = [x["column_names"] for x in idx] idxCols.extend([x["column_names"] for x in uniq if len(x["column_names"]) != 1]) # Don't forget primary key constraint if len(self.pkCols) == 1: uniqCols.append(self.pkCols.keys()) else: idxCols.append(self.pkCols.keys()) uniqColNames = listUnique(listFlatten(uniqCols)) # Remove unique constrained columns from columns to be indexed idxColNames = listSubtract(listUnique(listFlatten(idxCols)), uniqColNames) self.uniqCols = [self.cols[x] for x in uniqColNames] # Redundant fields are excluded if REMOVE_REDUNDANT_FIELDS: self.idxCols = [self.cols[x] for x in idxColNames if not self.cols[x].isRedundant()] else: self.idxCols = [self.cols[x] for x in idxColNames] LOG.debug("Unique constraints on table %s, columns %s" % (self.tableName, str([x.name for x in self.uniqCols]))) LOG.debug("Indexes on table %s, columns %s" % (self.tableName, str([x.name for x in self.idxCols])))
def _setIndexedCols(self): '''Decides which fields must be indexed, based on key and index information.''' #Find all columns that are indexed or unique indexed uniq = self.sqlDb.inspector.get_unique_constraints(self.tableName) idx = self.sqlDb.inspector.get_indexes(self.tableName) #Only single-field constraints will be carried over as such uniqCols = [x['column_names'] for x in uniq \ if len(x['column_names']) == 1] idxCols = [x['column_names'] for x in idx] idxCols.extend([x['column_names'] for x in uniq \ if len(x['column_names']) != 1]) #Don't forget primary key constraint if len(self.pkCols) == 1: uniqCols.append(self.pkCols.keys()) else: idxCols.append(self.pkCols.keys()) uniqColNames = listUnique(listFlatten(uniqCols)) #Remove unique constrained columns from columns to be indexed idxColNames = listSubtract(listUnique(listFlatten(idxCols)), uniqColNames) self.uniqCols = [self.cols[x] for x in uniqColNames] #Redundant fields are excluded if REMOVE_REDUNDANT_FIELDS: self.idxCols = [self.cols[x] for x in idxColNames \ if not self.cols[x].isRedundant()] else: self.idxCols = [self.cols[x] for x in idxColNames] LOG.debug("Unique constraints on table %s, columns %s" % (self.tableName, str([x.name for x in self.uniqCols]))) LOG.debug("Indexes on table %s, columns %s" % (self.tableName, str([x.name for x in self.idxCols])))
def importTableCsv(self, tableObj): '''Imports a table to Neo4j.''' if not (tableObj.isManyToMany() \ and MANY_TO_MANY_AS_RELATION): #Standard table import LOG.info("Importing %s..." % tableObj.labelName) #Match column names with their respective import expression colnames = [x for x in tableObj.importCols.keys()] colImpExpr = [col.impFunc("csvLine.%s") % name for name, col in tableObj.importCols.items()] cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)] colClause = string.join(cols, ',') createClause = "CREATE (n:%s { %s})" % (tableObj.labelName, colClause) for f in tableObj.filesWritten: periodicCommitClause = "USING PERIODIC COMMIT %s " \ % self.periodicCommit targetFileName = getTargetFilename(f) importClause = "LOAD CSV WITH HEADERS " + \ "FROM 'file:%s' AS csvLine " % targetFileName cypherQuery = periodicCommitClause + importClause + \ createClause self.cypher_exec(cypherQuery) else: #Not necessary to import many-to-many tables. So don't. LOG.info("Skipping many-to-many table %s..." % tableObj.labelName)
def createRelationsFk(self, fKey): '''Create relations on Neo4j, based on an sql foreign key.''' fkLabel = fKey.table.labelName pkLabel = fKey.refTable.labelName #Both nodes will be matched against their own primary keys #The pattern is like: # MATCH (referencing:{referencing primary key}), # (referenced:{referenced primary key}) fkColsImportExpr = [(name, col.impFunc("csvLine.%s") % name) for name, col in fKey.table.pkCols.items()] fkCols = string.join(["%s: %s" % tup for tup in fkColsImportExpr], ",") #We need to match the name of the field on the primary key table #with the name of the field on the foreign key table pkColsImportExpr = [(fkColName, pkCol.impFunc("csvLine.%s") % pkName) \ for (pkName, pkCol), fkColName in \ zip(fKey.consCols.items(), fKey.refCols.keys())] pkCols = string.join(["%s: %s" % tup for tup in pkColsImportExpr], ",") relType = fKey.relType LOG.info("Foreign key to table %s..." % pkLabel) #Emit one statement per file written for filename in fKey.table.filesWritten: targetFileName = getTargetFilename(filename) statement = self.relStatementPat % (self.periodicCommit, targetFileName, pkLabel, pkCols, fkLabel, fkCols, relType, "") LOG.debug(statement) self.cypher_exec(statement)
def createIndexes(self, tableObj): '''Creates indexes on Neo4j.''' label = tableObj.labelName LOG.info("Creating indexes on %s..." % label) for col in tableObj.idxCols: statement = "create index on :%s(%s)" % (label, col.name) LOG.debug(statement) self.cypher_exec(statement)
def createConstraints(self, tableObj): '''Creates unique constraints on Neo4j.''' label = tableObj.labelName LOG.info("Creating constraint on %s..." % tableObj.labelName) for col in tableObj.uniqCols: statement = """create constraint on (n:%s) assert n.%s is unique""" % (label, col.name) LOG.debug(statement) self.cypher_exec(statement)
def manyToManyRelations(self, tableObj): '''Transfers a many-to-many table as relationships in Neo4j''' #One can never know what's going to go wrong... assert len(tableObj.fKeys) == 2 #We need to know names of fields on two foreign key tables src = tableObj.fKeys[0] dest = tableObj.fKeys[1] pk1Label = src.refTable.labelName pk2Label = dest.refTable.labelName pk1ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") % pkName) for (pkName, pkCol), refColName in \ zip(src.consCols.items(), src.refCols.keys())] pk1Cols = string.join(["%s: %s" % tup for tup in pk1ColsImportExpr], ",") pk2ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") % pkName) for (pkName, pkCol), refColName in \ zip(dest.consCols.items(), dest.refCols.keys())] pk2Cols = string.join(["%s: %s" % tup for tup in pk2ColsImportExpr], ",") assert hasattr(tableObj, 'relType') relType = tableObj.relType LOG.info("Importing many-to-many table %s as relationships..." % tableObj.tableName) colnames = [x for x in tableObj.importCols.keys()] colImpExpr = [ col.impFunc("csvLine.%s") % name for name, col in tableObj.importCols.items() ] cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)] colClause = "{%s}" % string.join(cols, ',') if cols else "" for filename in tableObj.filesWritten: targetFileName = getTargetFilename(filename) statement = self.relStatementPat % ( self.periodicCommit, targetFileName, pk2Label, pk2Cols, pk1Label, pk1Cols, relType, colClause) LOG.debug(statement) self.cypher_exec(statement)
def manyToManyRelations(self, tableObj): '''Transfers a many-to-many table as relationships in Neo4j''' #One can never know what's going to go wrong... assert len(tableObj.fKeys) == 2 #We need to know names of fields on two foreign key tables src = tableObj.fKeys[0] dest = tableObj.fKeys[1] pk1Label = src.refTable.labelName pk2Label = dest.refTable.labelName pk1ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") % pkName) for (pkName, pkCol), refColName in \ zip(src.consCols.items(), src.refCols.keys())] pk1Cols = string.join(["%s: %s" % tup for tup in pk1ColsImportExpr], ",") pk2ColsImportExpr = [(refColName, pkCol.impFunc("csvLine.%s") % pkName) for (pkName, pkCol), refColName in \ zip(dest.consCols.items(), dest.refCols.keys())] pk2Cols = string.join(["%s: %s" % tup for tup in pk2ColsImportExpr], ",") assert hasattr(tableObj, 'relType') relType = tableObj.relType LOG.info("Importing many-to-many table %s as relationships..." % tableObj.tableName) colnames = [x for x in tableObj.importCols.keys()] colImpExpr = [col.impFunc("csvLine.%s") % name for name, col in tableObj.importCols.items()] cols = ["%s: %s" % x for x in zip(colnames, colImpExpr)] colClause = "{%s}" % string.join(cols, ',') if cols else "" for filename in tableObj.filesWritten: targetFileName = getTargetFilename(filename) statement = self.relStatementPat % (self.periodicCommit, targetFileName, pk2Label, pk2Cols, pk1Label, pk1Cols, relType, colClause) LOG.debug(statement) self.cypher_exec(statement)
def export(self): '''Export all tables''' for tblName, tblObject in self.tables.items(): LOG.info("Exporting %s..." % tblName) tblObject.export()
def export(self): """Export all tables""" for tblName, tblObject in self.tables.items(): LOG.info("Exporting %s..." % tblName) tblObject.export()