def extractAndReadZIP(directory, zipToRead, rfc_solicitante): objControl = cInternalControl() #Change / to \\ if neccesary separationFolder = '' if objControl.testingMode: separationFolder = '\\' else: separationFolder = '/' myZip = zipfile.ZipFile(directory + separationFolder + zipToRead, 'r') #The zip's file name will be the name of excel file name, like the "Database" excel_fileName = os.path.splitext(os.path.split( myZip.filename)[1])[0] + '.xlsx' #Creating the workbook (database) #Create the sheets: Ingreso_Egreso,Pago,Resto wb = excelpy.Workbook() #let's create the sheets in this order: Emisor, Receptor, Pago_Emisor,Pago_Receptor #Sheet1 is the first, so rename it. pago_sheet = wb['Sheet'] pago_sheet.title = 'Emisor' wb.create_sheet('Receptor') wb.create_sheet('Pago_Emisor') wb.create_sheet('Pago_Receptor') wb.save(directory + '/' + excel_fileName) contDocs = 0 #dicTableFields is a dictionary with the following structura key:table, value: list of fields dicTableFields = {} #Dictionaries for every kind of "tipo de comprobante" #First, get all the columns for all the tables for xml in myZip.namelist(): doc_xml = myZip.open(xml) root = ET.parse(doc_xml).getroot() for node in root.iter(): #Column = attribute , Node= Table #Get attributes (columns) of current Node (table) #Split the node (table) name because all come as "{something}Node" and {content} is not needed #If the number of nodes > 1 then not get its fields, we only want 1 row chunk = str(node.tag).split('}') tableName = chunk[1] numOfNodes = len(list(node)) #some "numOfNodes" are Zero, therefore I compare numOfNodes < 2 so I can get 0 and 1 if (numOfNodes < 2) or (numOfNodes > 1 and tableName == 'Comprobante'): chunk = str(node.tag).split('}') tableName = chunk[1] #As all the fields will be in one single sheet, it can occur two fields with the same #name ex: Rfc and Rfc (Emisor and recipient), then it's needed to add prefix tableName for attr in node.attrib: fieldName = tableName + '_' + attr if tableName not in dicTableFields: dicTableFields[tableName] = [fieldName] else: if fieldName not in dicTableFields[tableName]: dicTableFields[tableName].append(fieldName) #End of node iteration #Second, when got all fields from all xml, print them in spread sheet lsFields = [] #Start - Add extra fields in the beginning lsFields.append('nombreArchivo') lsFields.append('mes') #End - Add extra fields in the beginning lsSource = [] if len(objControl.lsCustomFields) == 0: lsSource = dicTableFields else: lsSource = objControl.lsCustomFields #I append instead of just assign the list, because I need the column "mes" in the very beginning for field in lsSource: lsFields.append(field) for field in objControl.lsRemove: if field in lsFields: lsFields.remove(field) #Start - Add extra fields at the end lsFields.append('Estatus') lsFields.append('Fecha/Hora de Consulta') #End - Add extra fields at the end #Print all lsFields on excel workbook for sheet in wb.sheetnames: wb[sheet].append(lsFields) wb.save(directory + '/' + excel_fileName) #Third, read information and insert where belongs #Conclusiones: # getroot() : Gets the root of the xml, then use getRoot to get "Comprobante" # root.find(.//...): gets any node inside the root, use this to any other node except the root for xml in myZip.namelist(): #I need the status for every xml (each cfdi) hence a declare the following fields: #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total vEmisorRfc = None vReceptorRfc = None vTimbreFiscal = None vComprobanteTotal = None #Get field TipoDeComprobante to knwo where sheet to print #"Resto" is the default spread sheet doc_xml = myZip.open(xml) root = ET.parse(doc_xml).getroot() lsRfcTable = ['Emisor', 'Receptor'] sheetPrint = 'Nada' for item in lsRfcTable: node = returnFoundNode(root, item) if len(node) > 0: rfc_value = addColumnIfFound(node[0], None, None, 'look', ['Rfc', 'rfc']) if rfc_value == rfc_solicitante: tipoComprobante = addColumnIfFound( root, None, None, 'look', ['TipoDeComprobante', 'tipoDeComprobante']) for possibleValue in [ 'Ingreso', 'ingreso', 'I', 'i', 'E', 'Egreso', 'egreso', 'e' ]: if tipoComprobante == possibleValue: sheetPrint = item break for possibleValue in ['P', 'p', 'Pago', 'pago']: if tipoComprobante == possibleValue: #If "Pago" then it's clear it goes to Pago sheet, so here now divide Pago into #"Pago_Emisor", "Pago_Receptor" if item == 'Emisor': sheetPrint = 'Pago_Emisor' else: sheetPrint = 'Pago_Receptor' break if sheetPrint == 'Nada': #If "Nada" it means it goes to Pago, so here now divide Pago into #"Pago_Emisor", "Pago_Receptor" if item == 'Emisor': sheetPrint = 'Pago_Emisor' else: sheetPrint = 'Pago_Receptor' #Start to read the fields from lsFields=[] #Example of a field in lsFields : "Comprobante_Version" -> "tableName_Field" #One row per xml lsRow = [] #The field leads all the insertion #Algorith of reading fields for field in lsFields: #Cases if field == 'nombreArchivo': lsRow.append(xml) continue if field == 'mes': fechaFactura = addColumnIfFound(root, None, None, 'look', ['Fecha', 'fecha']) monthWord = returnMonthWord(int(fechaFactura.split('-')[1])) lsRow.append(monthWord) continue if field == 'Estatus': #Look for this fields and save them to validate status: #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total print(f'Obteniendo estado para {xml}') strStatus = None strStatus = validaEstadoDocumento(vEmisorRfc, vReceptorRfc, vTimbreFiscal, vComprobanteTotal) data = None if strStatus: data = strStatus['estado'] else: data = 'No hubo respuesta' lsRow.append(data) continue if field == 'Fecha/Hora de Consulta': lsRow.append(datetime.datetime.now().strftime(formatDateTime)) continue #Rest of cases chunks = field.split('_') table = chunks[0] column = chunks[1] if table == 'Comprobante': addColumnIfFound(root, column, lsRow, 'add', None) else: #Find the right prefix for table lsNode = returnFoundNode(root, table) if len(lsNode) == 1: addColumnIfFound(lsNode[0], column, lsRow, 'add', None) elif len(lsNode) > 1: #More than 1 table_column found with the same name in XML bTableWithField = False for node in root.findall('.//' + objControl.prefixCFDI + table): if len(node.attrib) > 0: #If this table has attributes, read it, other wise skip it because #if the column doesn't have fields, it means it holds children addColumnIfFound(node, column, lsRow, 'add', None) bTableWithField = True if not bTableWithField: lsRow.append(0) else: #No table name found lsRow.append(0) #Get values to validate CFDI #Look for this fields and save them to validate status: #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total currentSizeList = None currentSizeList = len(lsRow) if field == 'Emisor_Rfc': vEmisorRfc = str(lsRow[currentSizeList - 1]) continue if field == 'Receptor_Rfc': vReceptorRfc = str(lsRow[currentSizeList - 1]) continue if field == 'TimbreFiscalDigital_UUID': vTimbreFiscal = str(lsRow[currentSizeList - 1]) continue if field == 'Comprobante_Total': vComprobanteTotal = str(lsRow[currentSizeList - 1]) continue #End of field iteration #Append the whole xml in a single row wb[sheetPrint].append(lsRow) contDocs += 1 print('File done:', xml, '...', str(contDocs)) #End of each document (xml) iteration in a zip wb.save(directory + '/' + excel_fileName) #All xml processed at this point print('Files processed in ZIP file:', str(contDocs))
from cassandra.cluster import Cluster from cassandra.auth import PlainTextAuthProvider from cassandra.query import SimpleStatement from openpyxl import Workbook from openpyxl import load_workbook from InternalControl import cInternalControl objControl = cInternalControl() cloud_config = { 'secure_connect_bundle': 'secure-connect-' + objControl.db + '_serverless.zip' } def getCluster(): #Connect to Cassandra objCC = CassandraConnection() user = '' password = '' if objControl.db == 'dbquart': user = objCC.cc_user_dbquart password = objCC.cc_pwd_dbquart else: user = objCC.cc_user_test password = objCC.cc_pwd_test auth_provider = PlainTextAuthProvider(user, password) cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider) return cluster
def extractAndReadZIP_SQL(directory, zipToRead, rfc_solicitante): objControl = cInternalControl() #Change / to \\ if neccesary separationFolder = '' if objControl.testingMode: separationFolder = '\\' else: separationFolder = '/' myZip = zipfile.ZipFile(directory + separationFolder + zipToRead, 'r') contDocs = 0 #dicTableFields is a dictionary with the following structura key:table, value: list of fields dicTableFields = {} #Dictionaries for every kind of "tipo de comprobante" #First, get all the columns for all the tables for xml in myZip.namelist(): doc_xml = myZip.open(xml) root = ET.parse(doc_xml).getroot() for node in root.iter(): #Column = attribute , Node= Table #Get attributes (columns) of current Node (table) #Split the node (table) name because all come as "{something}Node" and {content} is not needed #If the number of nodes > 1 then not get its fields, we only want 1 row chunk = str(node.tag).split('}') tableName = chunk[1] numOfNodes = len(node.getchildren()) if (numOfNodes < 2) or (numOfNodes > 1 and tableName == 'Comprobante'): chunk = str(node.tag).split('}') tableName = chunk[1] #As all the fields will be in one single sheet, it can occur two fields with the same #name ex: Rfc and Rfc (Emisor and recipient), then it's needed to add prefix tableName for attr in node.attrib: fieldName = tableName + '_' + attr if tableName not in dicTableFields: dicTableFields[tableName] = [fieldName] else: if fieldName not in dicTableFields[tableName]: dicTableFields[tableName].append(fieldName) #End of node iteration #Second, when got all fields from all xml, print them in spread sheet lsFields = [] #Add extra fields here lsFields.append('nombreArchivo') lsFields.append('mes') lsSource = [] if len(objControl.lsCustomFields) == 0: lsSource = dicTableFields else: lsSource = objControl.lsCustomFields #I append insetad of just assign the list, because I need the column "mes" in the very beginning for field in lsSource: lsFields.append(field) for field in objControl.lsRemove: if field in lsFields: lsFields.remove(field) #Add id_solicitud at the end lsFields.append('id_solicitud') #lsFieldsSQL is the list which contains the name of fields as shown in database, all the fields in tables are # the same as fields but in lower case, this means: "Comprobante_Fecha" turns to "comprobante_fecha" lsFieldsSQL = [x.lower() for x in lsFields] #Convert lsFieldsSQL into the way they will appear in statement : insert into... -> (field1,field2,...) fieldsInStatement = '(' + ','.join(lsFieldsSQL) + ')' #Third, read information and insert where belongs #Conclusiones: # getroot() : Gets the root of the xml, then use getRoot to get "Comprobante" # root.find(.//...): gets any node inside the root, use this to any other node except the root for xml in myZip.namelist(): #Get field TipoDeComprobante to knwo where sheet to print doc_xml = myZip.open(xml) root = ET.parse(doc_xml).getroot() lsRfcTable = ['Emisor', 'Receptor'] for item in lsRfcTable: node = returnFoundNode(root, item) if len(node) > 0: rfc_value = node[0].get('Rfc') if rfc_value == rfc_solicitante: if ((root.get('TipoDeComprobante') == 'I' or root.get('TipoDeComprobante') == 'Ingreso' or root.get('TipoDeComprobante') == 'ingreso') or (root.get('TipoDeComprobante') == 'E') or (root.get('TipoDeComprobante') == 'Egreso') or (root.get('TipoDeComprobante') == 'egreso')): tableSQL = item elif (root.get('TipoDeComprobante') == 'P' or root.get('TipoDeComprobante') == 'Pago' or root.get('TipoDeComprobante') == 'pago'): tableSQL = 'Pago' else: tableSQL = 'Pago' break #Start to read the fields from lsFields=[] #Example of a field in lsFields : "Comprobante_Version" -> "tableName_Field" #One row per xml lsRow = [] #The field leads all the insertion #Algorithm of reading fields for field in lsFields: #Cases if field == 'nombreArchivo': lsRow.append(xml) continue if field == 'mes': fechaFactura = root.get('Fecha') monthWord = returnMonthWord(int(fechaFactura.split('-')[1])) lsRow.append(monthWord) continue if field == 'id_solicitud': # Add id_solicitud value #For test case only (when running from main.py) #ID_CURRENT_SOLICITUD='6' #End "For test case only..." lsRow.append(ID_CURRENT_SOLICITUD) continue #Rest of cases chunks = field.split('_') table = chunks[0] column = chunks[1] if table == 'Comprobante': addColumnIfFound_SQL(root, column, lsRow, '0') else: #Find the right prefix for table lsNode = returnFoundNode(root, table) if len(lsNode) == 1: addColumnIfFound_SQL(lsNode[0], column, lsRow, '0') elif len(lsNode) > 1: #More than 1 table_column (Node) found with the same name in XML bTableWithField = False for node in root.findall('.//' + objControl.prefixCFDI + table): if len(node.attrib) > 0: #If this table has attributes, read it, other wise skip it becase #if the column doesn't have fields, it means it holds children addColumnIfFound_SQL(node, column, lsRow, '0') bTableWithField = True if not bTableWithField: #The table exists, but it doesn't have the current field lsRow.append('0') else: #No table name found lsRow.append('0') #End of field iteration #Append the whole xml in a single row in sql #Convert lsRow into a correct value list for SQL lsFieldsNotToMatch = [ 'comprobante_subtotal', 'impuestos_totalimpuestosretenidos', 'impuestos_totalimpuestostrasladados', 'comprobante_total', 'comprobante_descuento', 'id_solicitud' ] lsFieldsToMatch = [] for field in lsFieldsSQL: if field not in lsFieldsNotToMatch: lsFieldsToMatch.append(field) for field in lsFieldsToMatch: transforValuesToSQLFormat(field, lsFieldsSQL, lsRow) valuesInSatement = ",".join(lsRow) finalCmd = "insert into " + tableSQL + " " + fieldsInStatement + " values (" + valuesInSatement + ") ;" bd.getQueryOrExecuteTransaction_NoReturning(finalCmd) contDocs += 1 #End of each document (xml) iteration in a zip #All xml processed at this point cmd = "update solicitud set conteo=1 where id=" + ID_CURRENT_SOLICITUD + ";" bd.getQueryOrExecuteTransaction_NoReturning(cmd) print('Files processed in ZIP file:', str(contDocs))