def __createFileDbDataImport(self, importTitle, importInfo): path = importInfo["file"] if not os.path.exists(path): raise DataImportError("File not found: {0}".format( os.path.realpath(path))) table = importInfo.get("table") sql = importInfo.get("sql") query = self.__getQuery(sql, table) destination = importInfo.get("destination_table") or importTitle overwrite = self.__system.getConfig().isOverwrite() titleColumn = importInfo.get("import_title_column") nullValues = importInfo.get("null_values") dbNull = importInfo.get("db_null") types = importInfo.get("types") conn = self.__system.getConnectionManager().open(path=path) return SQLDataImport(name=importTitle, destination=destination, overwrite=overwrite, db=conn, query=query, titleColumn=titleColumn, nullValues=nullValues, dbNull=dbNull, types=types)
def __initializeTable(self, db, tableName, columnNames, columnTypes): if tableName not in self.__importedTables: for tableDef in db.TableDefs: if tableDef.Name == tableName: db.TableDefs.Delete(tableName) table = db.CreateTableDef(tableName) for name, pyType in zip(columnNames, columnTypes): table.Fields.Append( table.CreateField(name, AccessDatabase.pyToSqlTypes[pyType])) try: db.TableDefs.Append(table) except pywintypes.com_error as e: raise DataImportError( textwrap.dedent(""" Error adding table '{table}': {error} Column names: {columns} Column types: {types} """.format(table=tableName, error=e.excepinfo[2], columns=", ".join(columnNames), types=", ".join(columnTypes)))) self.__importedTables.append(tableName)
def __getQuery(self, sqlFile=None, table=None): if sqlFile: return self.__configHelper.readFile(sqlFile) elif table: return "SELECT * FROM {0}".format(table) raise DataImportError("No query or table specified.")
def importData(self, dataImport): ''' Imports data into the database. :param dataImport: the DataImport to store data from :type dataImport: `.DataImport` ''' tableName = dataImport.getDestination() titleColumn = dataImport.getTitleColumn() columnNames = list(dataImport.getColumns()) if not columnNames: raise DataImportError( "Error processing data import '{0}': No column names " "specified. Either configure the column names explicitly or " "ensure that the input data contains a header row.".format( dataImport.getName())) if titleColumn: columnNames.insert(0, titleColumn) columnTypes = dataImport.getTypes() if titleColumn: columnTypes.insert(0, "str") self.__initializeTable(tableName, columnNames, columnTypes) with self.__getCursor() as cur: cur.copy_from(file=self.__FileProtocolWrapper( dataImport.getRows(), title=dataImport.getName() if titleColumn else None), table=tableName, sep=",", columns=columnNames, null="")
def __extractFields(self, recordSet, columnNames): fields = [] for name in columnNames: try: fields.append(recordSet.Fields[name]) except: raise DataImportError( "No field named '{0}' found in input.".format(name)) return fields
def __extractFields(self, recordSet, columnNames): fields = [] for name in columnNames: try: fields.append(recordSet.Fields[name]) except: raise DataImportError( textwrap.dedent(""" No field named '{0}' found in input data. Input data fields: {1}""".format( name, ", ".join((f.Name for f in recordSet.Fields))))) return fields
def __createDbfDataImport(self, importTitle, importInfo): path = self.__configHelper.getAbsolutePath(importInfo["file"]) if not os.path.exists(path): raise DataImportError("File not found: {0}".format( os.path.realpath(path))) destination = importInfo.get("destination_table") or importTitle titleColumn = importInfo.get("import_title_column") return DBFDataImport(name=importTitle, path=path, destination=destination, titleColumn=titleColumn)
def __createDbfDataImport(self, importTitle, importInfo): path = importInfo["file"] if not os.path.exists(path): raise DataImportError("File not found: {0}".format( os.path.realpath(path))) destination = importInfo.get("destination_table") or importTitle overwrite = self.__system.getConfig().isOverwrite() titleColumn = importInfo.get("import_title_column") columns = importInfo.get("columns") types = importInfo.get("types") return DBFDataImport(name=importTitle, path=path, destination=destination, overwrite=overwrite, columns=columns, types=types, titleColumn=titleColumn)
def importData(self, dataImport): ''' Imports data into the database. :param dataImport: the DataImport to store data from :type dataImport: `.DataImport` ''' tableName = dataImport.getDestination() titleColumn = dataImport.getTitleColumn() columnNames = list(dataImport.getColumns()) if not columnNames: raise DataImportError( "Error processing data import '{0}': No column names " "specified. Either configure the column names explicitly or " "ensure that the input data contains a header row.".format( dataImport.getName())) if titleColumn: columnNames.insert(0, titleColumn) columnNames = self.__cleanColumnNames(columnNames) columnTypes = dataImport.getTypes() if titleColumn: columnTypes.insert(0, "str") self.__initializeTable(tableName, columnNames, columnTypes) insertSql = "INSERT INTO {table} ({columns}) VALUES ({placeholders})" \ .format(table=tableName, columns=",".join(columnNames), placeholders=",".join("?" * len(columnNames))) batch = [] for count, row in enumerate(dataImport.getRows(), 1): if titleColumn: row.insert(0, dataImport.getName()) if len(row) > len(columnNames): raise DataImportError( "Error processing data import '{0}': {1} columns " "expected, but data contains {2}. Check configured " "column names.".format(dataImport.getName(), len(columnNames), len(row))) data = [] for value in row: if self.__isSpace(value): data.append(None) data.append(value) batch.append(data) if count % 10000 == 0: if batch: self.executeMany(insertSql, batch) self.__log.info("Imported %d rows." % count) batch = [] if batch: self.executeMany(insertSql, batch) self.__log.info("Imported %d rows." % count)
def importData(self, dataImport): ''' Imports data into the database. :param dataImport: the DataImport to store data from :type dataImport: `.DataImport` ''' tableName = dataImport.getDestination() titleColumn = dataImport.getTitleColumn() columnNames = list(dataImport.getColumns()) if not columnNames: raise DataImportError( "Error processing data import '{0}': No column names " "specified. Either configure the column names explicitly or " "ensure that the input data contains a header row.".format( dataImport.getName())) if titleColumn: columnNames.insert(0, titleColumn) columnNames = self.__cleanColumnNames(columnNames) columnTypes = dataImport.getTypes() if titleColumn: columnTypes.insert(0, "str") dbEngine = win32com.client.Dispatch("DAO.DBEngine.120") db = dbEngine.OpenDatabase(self.__dbPath) recordSet = None try: self.__initializeTable(db, tableName, columnNames, columnTypes) recordSet = db.OpenRecordset(tableName) fields = self.__extractFields(recordSet, columnNames) count = 0 for count, row in enumerate(dataImport.getRows(), 1): if titleColumn: row.insert(0, dataImport.getName()) if len(row) > len(fields): raise DataImportError( "Error processing data import '{0}': {1} columns " "expected, but data contains {2}. Check configured " "column names.".format(dataImport.getName(), len(fields), len(row))) recordSet.AddNew() for i, value in enumerate(row): if self.__isSpace(value): continue fields[i].Value = value recordSet.Update() if count % 10000 == 0: self.__log.info("Imported {} rows.".format(count)) self.__log.info("Imported {} rows.".format(count)) finally: if recordSet: recordSet.Close() db.Close()