def getDBStructure(self, tableName, tableSchema): tableName = self.setTable(tableName=tableName) ret = OrderedDict() try: collection = self.isExists(tableName=tableName, tableSchema=tableSchema) if collection: cntRows = self.cntRows() ## there are rows - will use current strucutre if cntRows > 0: schemaObj = self.cursor[tableName].find_one() if schemaObj and len(schemaObj) > 0: for col in schemaObj: colName = uniocdeStr(col) colType = type(col) ret[colName] = { eJson.jSttValues.TYPE: colType, eJson.jSttValues.ALIACE: None } else: collectionInfo = self.cursor.command({ 'listCollections': 1, 'filter': { 'name': collection } }) #collectionInfo = self.cursor.get_collection_infos( filter=[collectionsL[tableName.lower()]] ) if 'cursor' in collectionInfo: cursorObj = collectionInfo['cursor'] if 'firstBatch' in cursorObj: firstBatch = cursorObj['firstBatch'] for batch in firstBatch: if 'options' in batch: validator = batch['options']['validator'] collectionProperties = validator[ '$jsonSchema']['properties'] for col in collectionProperties: colType = collectionProperties[col][ 'bsonType'] ret[uniocdeStr(col)] = { eJson.jSttValues.TYPE: colType, eJson.jSttValues.ALIACE: None } except Exception as e: p("MONGODB-> %s ERROR:\n %s " % (tableName, str(e)), "e") return ret
def __updateSTTBySourceOrTarget(self, srcStructure, pre="[", pos="]"): # Check if ther are sourcea in STT that not defined srcStrucureL = [] srcColumns = {} for col in srcStructure: srcStrucureL.append(col.replace(pre, "").replace(pos, "").lower()) if eJson.stt.SOURCE in srcStructure[col] and srcStructure[col][ eJson.stt.SOURCE]: srcName = srcStructure[col][eJson.stt.SOURCE].replace( pre, "").replace(pos, "") srcStrucureL.append(uniocdeStr(srcName)) srcColumns[srcName] = None removeColumnsSrc = [] if self.stt: for col in self.stt: if eJson.stt.SOURCE in self.stt[col] and self.stt[col][ eJson.stt.SOURCE] not in srcColumns: if self.stt[col][eJson.stt.SOURCE].replace( pre, "").replace(pos, "").lower() not in srcStrucureL: removeColumnsSrc.append(col) for col in removeColumnsSrc: p( "STT TAREGT %s HAVE INVALID SOURCE %s --> ignore COLUMN " % (col, self.stt[col][eJson.stt.SOURCE]), "w") del self.stt[col]
def __execEachLine(connObj, sqlTxt): sqlQuery = __split_sql_expressions(sqlTxt) isParam = True if len(locParams) > 0 else False for sql in sqlQuery: sql = re.sub(r"\s+", " ", sql) if isParam: sql = connObj.setQueryWithParams(query=sql, queryParams=locParams) if 'PRINT' in sql: disp = sql.split("'")[1] p('SQL PRINT: ' + disp, "i") if len(sql) > 1: sql = str( sql) if connObj.isExtractSqlIsOnlySTR else uniocdeStr(sql) connObj.exeSQL(sql=sql) p(u"FINISH EXEC: %s" % uniocdeStr(sql), "i")
def OLAP_Process(serverName, dbName, cubes=[], dims=[], fullProcess=True): import sys, os localPath = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(localPath, r'../dll/clrmodule.dll"')) import clr clr.AddReference( os.path.join(localPath, r'../dll/Microsoft.AnalysisServices.DLL')) from Microsoft.AnalysisServices import Server from Microsoft.AnalysisServices import ProcessType processType = ProcessType.ProcessFull if fullProcess else 0 # Connect to server amoServer = Server() amoServer.Connect(serverName) # Connect to database amoDb = None for d in amoServer.Databases: if str(d).lower() == dbName.lower(): amoDb = amoServer.Databases.GetByName(str(d)) break if not amoDb: p("OLAP: CANNOT FIND %s DB IN %s SERVER" % (dbName, serverName), "e") return for dim in amoDb.Dimensions: if len(dims) == 0 or dim in dims: try: dim.Process(processType) p( u"OLAP DB: %s, process DIM %s finish succeffully ... " % (uniocdeStr(dbName, decode=True), uniocdeStr(dim, decode=True)), "i") except Exception as e: p( u"OLAP DB: %s, ERROR processing DIM %s ... " % (uniocdeStr( dbName, decode=True), uniocdeStr(dim, decode=True)), "e") p(e, "e") for cube in amoDb.Cubes: if len(cubes) == 0 or cube in cubes: try: cube.Process(processType) p( u"OLAP DB: %s, CUBE %s finish succeffully ... " % (uniocdeStr(dbName, decode=True), uniocdeStr(cube, decode=True)), "i") except Exception as e: p( u"OLAP DB: %s, ERROR processing CUBE %s ... " % (uniocdeStr(dbName, decode=True), uniocdeStr(cube, decode=True)), "e") p(e, "e")
def dataTransform(self, data, functionDict=None, execDict=None): if isinstance(data, tuple): data = list(data) regex = r"(\{.*?\})" if (functionDict and len(functionDict) > 0) or (execDict and len(execDict) > 0): for num, dataRow in enumerate(data): row = list(dataRow) for ind in functionDict: newVal = row[ind] for fn in functionDict[ind]: newVal = fn.handler(newVal, ind) row[ind] = newVal for ind in execDict: newVal = execDict[ind] matches = re.finditer(regex, execDict[ind], re.MULTILINE | re.DOTALL) for matchNum, match in enumerate(matches): for groupNum in range(0, len(match.groups())): colNum = match.group(1).replace('{', '').replace( '}', '') colVal = row[int(colNum)] colVal = uniocdeStr(colVal, decode=True) if colVal else '' newVal = replaceStr(sString=str(newVal), findStr=match.group(1), repStr=colVal, ignoreCase=False, addQuotes=None) row[ind] = newVal data[num] = row ## ceOBDC - convert data to None if self.connType == eConn.types.SQLSERVER: for num, row in enumerate(data): data[num] = [i if i != '' else None for i in row] return data
def __split_sql_expressions(text): if (isinstance(text, basestring)): text = re.sub(r"\/\*.*\*\/|--.*?\n", "", text, re.MULTILINE | re.UNICODE | re.DOTALL) #text = re.sub(r"\s+", " ", text) return [text] results = [] text = uniocdeStr(sObj=text.read(), decode=False) text = re.sub( re.compile("/\*.*?\*/", re.MULTILINE | re.UNICODE | re.DOTALL), "", text) text = re.sub(re.compile("--.*?\n"), "", text, re.MULTILINE | re.UNICODE | re.DOTALL) # remov lines = text.split("GO\n") for l in lines: il = l.split(";\n") for ll in il: results.append(ll) results = results if len(results) > 0 else None return results
def mappingLoadingSourceToTarget(self, srcDictStructure, src, tar): if not srcDictStructure: return None tarToSrc = OrderedDict() srcPre, srcPos, tarPre, tarPos = '', '', '', '' if hasattr(src, 'columnFrame'): srcPre, srcPos = src.columnFrame[0], src.columnFrame[1] if hasattr(tar, 'columnFrame'): tarPre, tarPos = tar.columnFrame[0], tar.columnFrame[1] if src.isSingleObject: srcDictStructure = {'': srcDictStructure} # remove from STT column that not exists in Target OBJECT OR Source OBJECT for src in srcDictStructure: srcStructure = srcDictStructure[src] if src and len(src) > 0: tarStructure = tar.getStructure(objects=src) else: tarStructure = tar.getStructure() self.__updateSTTBySourceOrTarget(srcStructure=srcStructure, pre=srcPre, pos=srcPos) srcColumns = OrderedDict() if tarStructure and len(tarStructure) > 0: tarColumns = OrderedDict({ x.replace(tarPre, "").replace(tarPos, "").lower(): x for x in tarStructure }) else: tarColumns = [] sttColumns = OrderedDict() if self.stt: for x in self.stt: sttColumns[x.replace(tarPre, "").replace(tarPos, "").lower()] = x tarToSrc[src] = OrderedDict() ## {srcName in Target: Source column } for col in srcStructure: colAlias = col.replace(srcPre, "").replace(srcPos, "").lower() colName = srcStructure[col][ eJson.stt. SOURCE] if eJson.stt.SOURCE in srcStructure[col] else col srcColumns[colAlias] = colName # There is no target schema --> using all source and STT if self.addSourceColumn: for col in srcColumns: tarToSrc[src][col] = {eJson.stt.SOURCE: srcColumns[col]} else: for col in tarColumns: if col in srcColumns: tarToSrc[src][tarColumns[col]] = { eJson.stt.SOURCE: srcColumns[col] } tarToSrcColumns = { x.replace(tarPre, "").replace(tarPos, "").lower(): x for x in tarToSrc[src] } for col in sttColumns: if col in tarToSrcColumns: tarToSrc[src][tarToSrcColumns[col]].update( self.stt[sttColumns[col]]) else: tarToSrc[src][sttColumns[col]] = self.stt[sttColumns[col]] #### Check Column in Source and not exists in mapping existsTarColumns = {} existsSrcColumns = {} for col in tarToSrc[src]: colL = col.replace(tarPre, "").replace(tarPos, "").lower() existsTarColumns[colL] = col if eJson.stt.SOURCE in tarToSrc[src][col] and tarToSrc[src][ col][eJson.stt.SOURCE]: srcL = tarToSrc[src][col][eJson.stt.SOURCE].replace( srcPre, "").replace(srcPos, "").lower() existsSrcColumns[srcL] = tarToSrc[src][col][ eJson.stt.SOURCE] columnNotMapped = u"" for col in tarColumns: if col not in existsTarColumns: columnNotMapped += uniocdeStr(tarColumns[col]) + u" ; " if len(columnNotMapped) > 0: p(u"TARGET COLUMN NOT MAPPED: %s" % (columnNotMapped), "w") columnNotMapped = u"" for col in srcColumns: if srcColumns[col].replace(srcPre, "").replace( srcPos, "").lower() not in existsSrcColumns: columnNotMapped += uniocdeStr(srcColumns[col]) + u" ; " if len(columnNotMapped) > 0: p(u"SOURCE COLUMN NOT MAPPED: %s" % (columnNotMapped), "w") return tarToSrc
def extract(self, tar, tarToSrcDict, batchRows=None): batchRows = batchRows if batchRows else self.batchSize startFromRow = 0 if not self.header else self.header fileStructureDict = self.getStructure() if self.isSingleObject: if len(self.objNames) > 0: fName = list(self.objNames.keys())[0] fileStructureDict = {fName: fileStructureDict} tarToSrcDict[fName] = tarToSrcDict[''] del tarToSrcDict[''] else: p("UNABLE TO EXTRACT FILE !!!") for fileName in fileStructureDict: fileStructure = fileStructureDict[fileName] fileFullPath = self.objNames[fileName][eObj.FILE_FULL_PATH] fileStructureL = OrderedDict() listOfColumnsH = {} targetColumnList = [] fnOnRowsDic = {} execOnRowsDic = {} listOfColumnsL = [] for i, col in enumerate(fileStructure): fileStructureL[col.lower()] = i listOfColumnsH[i] = col ## File with header and there is target to source mapping if tarToSrcDict and fileName in tarToSrcDict: tarToSrc = tarToSrcDict[fileName] mappingSourceColumnNotExists = u"" fileSourceColumnNotExists = u"" for i, col in enumerate(tarToSrc): targetColumnList.append(col) if eJson.stt.SOURCE in tarToSrc[col] and tarToSrc[col][ eJson.stt.SOURCE]: srcColumnName = tarToSrc[col][eJson.stt.SOURCE] if srcColumnName.lower() in fileStructureL: listOfColumnsL.append( fileStructureL[srcColumnName.lower()]) else: mappingSourceColumnNotExists += uniocdeStr( srcColumnName) + u" ; " else: listOfColumnsL.append(-1) ### ADD FUNCTION if eJson.stt.FUNCTION in tarToSrc[col] and tarToSrc[col][ eJson.stt.FUNCTION]: fnc = eval(tarToSrc[col][eJson.stt.FUNCTION]) fnOnRowsDic[i] = fnc if isinstance(fnc, (list, tuple)) else [fnc] ### ADD EXECUTION FUNCTIONS if eJson.stt.EXECFUNC in tarToSrc[col] and len( tarToSrc[col][eJson.stt.EXECFUNC]) > 0: newExcecFunction = tarToSrc[col][eJson.stt.EXECFUNC] regex = r"(\{.*?\})" matches = re.finditer( regex, tarToSrc[col][eJson.stt.EXECFUNC], re.MULTILINE | re.DOTALL) for matchNum, match in enumerate(matches): for groupNum in range(0, len(match.groups())): colName = match.group(1) if colName and len(colName) > 0: colName = colName.replace("{", "").replace( "}", "") if colName.lower() in fileStructureL: newExcecFunction = newExcecFunction.replace( colName, str(fileStructureL[ colName.lower()])) execOnRowsDic[i] = newExcecFunction for colNum in listOfColumnsH: if colNum not in listOfColumnsL: fileSourceColumnNotExists += uniocdeStr( listOfColumnsH[colNum]) + u" ; " if len(mappingSourceColumnNotExists) > 0: p( "SOURCE COLUMN EXISTS IN SOURCE TO TARGET MAPPING AND NOT FOUND IN SOURCE FILE: %s" % (mappingSourceColumnNotExists), "w") if len(fileSourceColumnNotExists) > 0: p( "FILE COLUMN NOT FOUD IN MAPPING: %s" % (fileSourceColumnNotExists), "w") ## There is no target to source mapping, load file as is else: for colNum in listOfColumnsH: listOfColumnsL.append(colNum) """ EXECUTING LOADING SOURCE FILE DATA """ rows = [] try: with codecs.open(fileFullPath, 'r', encoding=self.encode ) as textFile: # errors=self.withCharErr if self.isCsv: fFile = csv.reader(textFile, delimiter=self.delimiter) for i, split_line in enumerate(fFile): if i >= startFromRow: if self.replaceToNone: rows.append([ re.sub( self.replaceToNone, "", split_line[x], re.IGNORECASE | re.MULTILINE | re.UNICODE) if x > -1 and len(split_line[x]) > 0 else None for x in listOfColumnsL ]) else: rows.append([ split_line[x] if x > -1 and len(split_line[x]) > 0 else None for x in listOfColumnsL ]) if self.maxLinesParse and i > startFromRow and i % self.maxLinesParse == 0: rows = self.dataTransform( data=rows, functionDict=fnOnRowsDic, execDict=execOnRowsDic) tar.load(rows=rows, targetColumn=targetColumnList, objectName=fileName) rows = list([]) else: for i, line in enumerate(textFile): line = re.sub( self.replaceToNone, "", line, re.IGNORECASE | re.MULTILINE | re.UNICODE) if self.replaceToNone else line line = line.strip(self.endOfLine) split_line = line.split(self.delimiter) # Add headers structure if i >= startFromRow: rows.append([ split_line[x] if x > -1 and len(split_line[x]) > 0 else None for x in listOfColumnsL ]) if self.maxLinesParse and i > startFromRow and i % self.maxLinesParse == 0: rows = self.dataTransform( data=rows, functionDict=fnOnRowsDic, execDict=execOnRowsDic) tar.load(rows=rows, targetColumn=targetColumnList, objectName=fileName) rows = list([]) if len(rows) > 0: #and split_line: rows = self.dataTransform(data=rows, functionDict=fnOnRowsDic, execDict=execOnRowsDic) tar.load(rows=rows, targetColumn=targetColumnList, objectName=fileName) rows = list([]) except Exception as e: p("ERROR LOADING FILE %s >>>>>>" % (fileFullPath), "e") p(str(e), "e")