Esempio n. 1
0
def extractAndReadZIP(directory, zipToRead, rfc_solicitante):
    objControl = cInternalControl()
    #Change / to \\ if neccesary
    separationFolder = ''
    if objControl.testingMode:
        separationFolder = '\\'
    else:
        separationFolder = '/'
    myZip = zipfile.ZipFile(directory + separationFolder + zipToRead, 'r')
    #The zip's file name will be the name of excel file name, like the "Database"
    excel_fileName = os.path.splitext(os.path.split(
        myZip.filename)[1])[0] + '.xlsx'
    #Creating the workbook (database)
    #Create the sheets: Ingreso_Egreso,Pago,Resto
    wb = excelpy.Workbook()
    #let's create the sheets in this order: Emisor, Receptor, Pago_Emisor,Pago_Receptor
    #Sheet1 is the first, so rename it.
    pago_sheet = wb['Sheet']
    pago_sheet.title = 'Emisor'
    wb.create_sheet('Receptor')
    wb.create_sheet('Pago_Emisor')
    wb.create_sheet('Pago_Receptor')

    wb.save(directory + '/' + excel_fileName)
    contDocs = 0
    #dicTableFields is a dictionary with the following structura key:table, value: list of fields
    dicTableFields = {}
    #Dictionaries for every kind of "tipo de comprobante"
    #First, get all the columns for all the tables
    for xml in myZip.namelist():
        doc_xml = myZip.open(xml)
        root = ET.parse(doc_xml).getroot()
        for node in root.iter():
            #Column = attribute , Node= Table
            #Get attributes (columns) of current Node (table)
            #Split the node (table) name because all come as "{something}Node" and {content} is not needed
            #If the number of nodes > 1 then not get its fields, we only want 1 row
            chunk = str(node.tag).split('}')
            tableName = chunk[1]
            numOfNodes = len(list(node))
            #some "numOfNodes" are Zero, therefore I compare numOfNodes < 2 so I can get 0 and 1
            if (numOfNodes < 2) or (numOfNodes > 1
                                    and tableName == 'Comprobante'):
                chunk = str(node.tag).split('}')
                tableName = chunk[1]
                #As all the fields will be in one single sheet, it can occur two fields with the same
                #name ex: Rfc and Rfc (Emisor and recipient), then it's needed to add prefix tableName
                for attr in node.attrib:
                    fieldName = tableName + '_' + attr
                    if tableName not in dicTableFields:
                        dicTableFields[tableName] = [fieldName]
                    else:
                        if fieldName not in dicTableFields[tableName]:
                            dicTableFields[tableName].append(fieldName)
            #End of node iteration

    #Second, when got all fields from all xml, print them in spread sheet
    lsFields = []
    #Start -  Add extra fields in the beginning
    lsFields.append('nombreArchivo')
    lsFields.append('mes')
    #End    - Add extra fields in the beginning

    lsSource = []
    if len(objControl.lsCustomFields) == 0:
        lsSource = dicTableFields
    else:
        lsSource = objControl.lsCustomFields

    #I append instead of just assign the list, because I need the column "mes" in the very beginning
    for field in lsSource:
        lsFields.append(field)

    for field in objControl.lsRemove:
        if field in lsFields:
            lsFields.remove(field)

    #Start - Add extra fields at the end
    lsFields.append('Estatus')
    lsFields.append('Fecha/Hora de Consulta')
    #End - Add extra fields at the end

    #Print all lsFields on excel workbook
    for sheet in wb.sheetnames:
        wb[sheet].append(lsFields)
    wb.save(directory + '/' + excel_fileName)

    #Third, read information and insert where belongs
    #Conclusiones:
    # getroot() : Gets the root of the xml, then use getRoot to get "Comprobante"
    # root.find(.//...): gets any node inside the root, use this to any other node except the root
    for xml in myZip.namelist():
        #I need the status for every xml (each cfdi) hence a declare the following fields:
        #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total
        vEmisorRfc = None
        vReceptorRfc = None
        vTimbreFiscal = None
        vComprobanteTotal = None
        #Get field TipoDeComprobante to knwo where sheet to print
        #"Resto" is the default spread sheet
        doc_xml = myZip.open(xml)
        root = ET.parse(doc_xml).getroot()
        lsRfcTable = ['Emisor', 'Receptor']
        sheetPrint = 'Nada'
        for item in lsRfcTable:
            node = returnFoundNode(root, item)
            if len(node) > 0:
                rfc_value = addColumnIfFound(node[0], None, None, 'look',
                                             ['Rfc', 'rfc'])
                if rfc_value == rfc_solicitante:
                    tipoComprobante = addColumnIfFound(
                        root, None, None, 'look',
                        ['TipoDeComprobante', 'tipoDeComprobante'])
                    for possibleValue in [
                            'Ingreso', 'ingreso', 'I', 'i', 'E', 'Egreso',
                            'egreso', 'e'
                    ]:
                        if tipoComprobante == possibleValue:
                            sheetPrint = item
                            break
                    for possibleValue in ['P', 'p', 'Pago', 'pago']:
                        if tipoComprobante == possibleValue:
                            #If "Pago" then it's clear it goes to Pago sheet, so here now divide Pago into
                            #"Pago_Emisor", "Pago_Receptor"
                            if item == 'Emisor':
                                sheetPrint = 'Pago_Emisor'
                            else:
                                sheetPrint = 'Pago_Receptor'
                            break
                    if sheetPrint == 'Nada':
                        #If "Nada" it means it goes to Pago, so here now divide Pago into
                        #"Pago_Emisor", "Pago_Receptor"
                        if item == 'Emisor':
                            sheetPrint = 'Pago_Emisor'
                        else:
                            sheetPrint = 'Pago_Receptor'

        #Start to read the fields from lsFields=[]
        #Example of a field in lsFields : "Comprobante_Version" -> "tableName_Field"
        #One row per xml
        lsRow = []
        #The field leads all the insertion
        #Algorith of reading fields
        for field in lsFields:
            #Cases
            if field == 'nombreArchivo':
                lsRow.append(xml)
                continue
            if field == 'mes':
                fechaFactura = addColumnIfFound(root, None, None, 'look',
                                                ['Fecha', 'fecha'])
                monthWord = returnMonthWord(int(fechaFactura.split('-')[1]))
                lsRow.append(monthWord)
                continue
            if field == 'Estatus':
                #Look for this fields and save them to validate status:
                #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total
                print(f'Obteniendo estado para {xml}')
                strStatus = None
                strStatus = validaEstadoDocumento(vEmisorRfc, vReceptorRfc,
                                                  vTimbreFiscal,
                                                  vComprobanteTotal)
                data = None
                if strStatus:
                    data = strStatus['estado']
                else:
                    data = 'No hubo respuesta'
                lsRow.append(data)
                continue
            if field == 'Fecha/Hora de Consulta':
                lsRow.append(datetime.datetime.now().strftime(formatDateTime))
                continue
            #Rest of cases
            chunks = field.split('_')
            table = chunks[0]
            column = chunks[1]
            if table == 'Comprobante':
                addColumnIfFound(root, column, lsRow, 'add', None)
            else:
                #Find the right prefix for table
                lsNode = returnFoundNode(root, table)
                if len(lsNode) == 1:
                    addColumnIfFound(lsNode[0], column, lsRow, 'add', None)
                elif len(lsNode) > 1:
                    #More than 1 table_column found with the same name in XML
                    bTableWithField = False
                    for node in root.findall('.//' + objControl.prefixCFDI +
                                             table):
                        if len(node.attrib) > 0:
                            #If this table has attributes, read it, other wise skip it because
                            #if the column doesn't have fields, it means it holds children
                            addColumnIfFound(node, column, lsRow, 'add', None)
                            bTableWithField = True
                    if not bTableWithField:
                        lsRow.append(0)

                else:
                    #No table name found
                    lsRow.append(0)

            #Get values to validate CFDI
            #Look for this fields and save them to validate status:
            #Emisor_Rfc,Receptor_Rfc,TimbreFiscalDigital_UUID,Comprobante_Total
            currentSizeList = None
            currentSizeList = len(lsRow)
            if field == 'Emisor_Rfc':
                vEmisorRfc = str(lsRow[currentSizeList - 1])
                continue
            if field == 'Receptor_Rfc':
                vReceptorRfc = str(lsRow[currentSizeList - 1])
                continue
            if field == 'TimbreFiscalDigital_UUID':
                vTimbreFiscal = str(lsRow[currentSizeList - 1])
                continue
            if field == 'Comprobante_Total':
                vComprobanteTotal = str(lsRow[currentSizeList - 1])
                continue
            #End of field iteration

        #Append the whole xml in a single row
        wb[sheetPrint].append(lsRow)
        contDocs += 1
        print('File done:', xml, '...', str(contDocs))
        #End of each document (xml) iteration in a zip
        wb.save(directory + '/' + excel_fileName)

    #All xml processed at this point
    print('Files processed in ZIP file:', str(contDocs))
Esempio n. 2
0
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
from openpyxl import Workbook
from openpyxl import load_workbook
from InternalControl import cInternalControl

objControl = cInternalControl()
cloud_config = {
    'secure_connect_bundle':
    'secure-connect-' + objControl.db + '_serverless.zip'
}


def getCluster():
    #Connect to Cassandra
    objCC = CassandraConnection()
    user = ''
    password = ''
    if objControl.db == 'dbquart':
        user = objCC.cc_user_dbquart
        password = objCC.cc_pwd_dbquart
    else:
        user = objCC.cc_user_test
        password = objCC.cc_pwd_test

    auth_provider = PlainTextAuthProvider(user, password)
    cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)

    return cluster
Esempio n. 3
0
def extractAndReadZIP_SQL(directory, zipToRead, rfc_solicitante):
    objControl = cInternalControl()
    #Change / to \\ if neccesary
    separationFolder = ''
    if objControl.testingMode:
        separationFolder = '\\'
    else:
        separationFolder = '/'
    myZip = zipfile.ZipFile(directory + separationFolder + zipToRead, 'r')
    contDocs = 0
    #dicTableFields is a dictionary with the following structura key:table, value: list of fields
    dicTableFields = {}
    #Dictionaries for every kind of "tipo de comprobante"
    #First, get all the columns for all the tables
    for xml in myZip.namelist():
        doc_xml = myZip.open(xml)
        root = ET.parse(doc_xml).getroot()
        for node in root.iter():
            #Column = attribute , Node= Table
            #Get attributes (columns) of current Node (table)
            #Split the node (table) name because all come as "{something}Node" and {content} is not needed
            #If the number of nodes > 1 then not get its fields, we only want 1 row
            chunk = str(node.tag).split('}')
            tableName = chunk[1]
            numOfNodes = len(node.getchildren())
            if (numOfNodes < 2) or (numOfNodes > 1
                                    and tableName == 'Comprobante'):
                chunk = str(node.tag).split('}')
                tableName = chunk[1]
                #As all the fields will be in one single sheet, it can occur two fields with the same
                #name ex: Rfc and Rfc (Emisor and recipient), then it's needed to add prefix tableName
                for attr in node.attrib:
                    fieldName = tableName + '_' + attr
                    if tableName not in dicTableFields:
                        dicTableFields[tableName] = [fieldName]
                    else:
                        if fieldName not in dicTableFields[tableName]:
                            dicTableFields[tableName].append(fieldName)
            #End of node iteration

    #Second, when got all fields from all xml, print them in spread sheet
    lsFields = []
    #Add extra fields here
    lsFields.append('nombreArchivo')
    lsFields.append('mes')
    lsSource = []
    if len(objControl.lsCustomFields) == 0:
        lsSource = dicTableFields
    else:
        lsSource = objControl.lsCustomFields

    #I append insetad of just assign the list, because I need the column "mes" in the very beginning
    for field in lsSource:
        lsFields.append(field)

    for field in objControl.lsRemove:
        if field in lsFields:
            lsFields.remove(field)

    #Add id_solicitud at the end
    lsFields.append('id_solicitud')
    #lsFieldsSQL is the list which contains the name of fields as shown in database, all the fields in tables are
    # the same as fields but in lower case, this means: "Comprobante_Fecha" turns to "comprobante_fecha"
    lsFieldsSQL = [x.lower() for x in lsFields]
    #Convert lsFieldsSQL into the way they will appear in statement : insert into... -> (field1,field2,...)
    fieldsInStatement = '(' + ','.join(lsFieldsSQL) + ')'
    #Third, read information and insert where belongs
    #Conclusiones:
    # getroot() : Gets the root of the xml, then use getRoot to get "Comprobante"
    # root.find(.//...): gets any node inside the root, use this to any other node except the root
    for xml in myZip.namelist():
        #Get field TipoDeComprobante to knwo where sheet to print
        doc_xml = myZip.open(xml)
        root = ET.parse(doc_xml).getroot()
        lsRfcTable = ['Emisor', 'Receptor']
        for item in lsRfcTable:
            node = returnFoundNode(root, item)
            if len(node) > 0:
                rfc_value = node[0].get('Rfc')
                if rfc_value == rfc_solicitante:
                    if ((root.get('TipoDeComprobante') == 'I'
                         or root.get('TipoDeComprobante') == 'Ingreso'
                         or root.get('TipoDeComprobante') == 'ingreso')
                            or (root.get('TipoDeComprobante') == 'E')
                            or (root.get('TipoDeComprobante') == 'Egreso')
                            or (root.get('TipoDeComprobante') == 'egreso')):
                        tableSQL = item
                    elif (root.get('TipoDeComprobante') == 'P'
                          or root.get('TipoDeComprobante') == 'Pago'
                          or root.get('TipoDeComprobante') == 'pago'):
                        tableSQL = 'Pago'
                    else:
                        tableSQL = 'Pago'
                    break

        #Start to read the fields from lsFields=[]
        #Example of a field in lsFields : "Comprobante_Version" -> "tableName_Field"
        #One row per xml
        lsRow = []
        #The field leads all the insertion
        #Algorithm of reading fields
        for field in lsFields:
            #Cases
            if field == 'nombreArchivo':
                lsRow.append(xml)
                continue
            if field == 'mes':
                fechaFactura = root.get('Fecha')
                monthWord = returnMonthWord(int(fechaFactura.split('-')[1]))
                lsRow.append(monthWord)
                continue
            if field == 'id_solicitud':
                # Add id_solicitud value
                #For test case only (when running from main.py)
                #ID_CURRENT_SOLICITUD='6'
                #End "For test case only..."
                lsRow.append(ID_CURRENT_SOLICITUD)
                continue
            #Rest of cases
            chunks = field.split('_')
            table = chunks[0]
            column = chunks[1]
            if table == 'Comprobante':
                addColumnIfFound_SQL(root, column, lsRow, '0')
            else:
                #Find the right prefix for table
                lsNode = returnFoundNode(root, table)
                if len(lsNode) == 1:
                    addColumnIfFound_SQL(lsNode[0], column, lsRow, '0')
                elif len(lsNode) > 1:
                    #More than 1 table_column (Node) found with the same name in XML
                    bTableWithField = False
                    for node in root.findall('.//' + objControl.prefixCFDI +
                                             table):
                        if len(node.attrib) > 0:
                            #If this table has attributes, read it, other wise skip it becase
                            #if the column doesn't have fields, it means it holds children
                            addColumnIfFound_SQL(node, column, lsRow, '0')
                            bTableWithField = True
                    if not bTableWithField:
                        #The table exists, but it doesn't have the current field
                        lsRow.append('0')

                else:
                    #No table name found
                    lsRow.append('0')
            #End of field iteration

        #Append the whole xml in a single row in sql
        #Convert lsRow into a correct value list for SQL
        lsFieldsNotToMatch = [
            'comprobante_subtotal', 'impuestos_totalimpuestosretenidos',
            'impuestos_totalimpuestostrasladados', 'comprobante_total',
            'comprobante_descuento', 'id_solicitud'
        ]
        lsFieldsToMatch = []
        for field in lsFieldsSQL:
            if field not in lsFieldsNotToMatch:
                lsFieldsToMatch.append(field)

        for field in lsFieldsToMatch:
            transforValuesToSQLFormat(field, lsFieldsSQL, lsRow)
        valuesInSatement = ",".join(lsRow)
        finalCmd = "insert into " + tableSQL + " " + fieldsInStatement + " values (" + valuesInSatement + ") ;"
        bd.getQueryOrExecuteTransaction_NoReturning(finalCmd)
        contDocs += 1
        #End of each document (xml) iteration in a zip

    #All xml processed at this point
    cmd = "update solicitud set conteo=1 where id=" + ID_CURRENT_SOLICITUD + ";"
    bd.getQueryOrExecuteTransaction_NoReturning(cmd)
    print('Files processed in ZIP file:', str(contDocs))