Ejemplos de XML_PDFParser en Python

Lenguaje de programación: Python

Clase / Tipo: XML_PDFParser

Ejemplos en hotexamples.com: 2

Python XML_PDFParser - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de XML_PDFParser extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

buscar_fichero(2)

eliminar_simbolos_html(1)

formato_web(1)

fusionar_ficheros(1)

obten_grupo_xml(1)

obten_grupo_xml_multiple(1)

procesa_titulos_pdfs(1)

Ejemplo n.º 1

Mostrar archivo

    def procesar_dblp_local(self):
        
        self.ventana.progressBar_2.setValue(0)        
        
        # Obtenemos los valores de el origen y destino de los datos
        origen = self.ventana.ComboBoxOrigen.currentIndex()
        destino = self.ventana.ComboBoxDestino.currentIndex()
        
        # Obtenemos el termino por el cual filtrar nuestros datos
        termino_busqueda = str(self.ventana.TerminoBusqueda.toPlainText())
        
        indices = []
        fichero_origen = "dblp.xml"
        # Origen es la plataforma DBLP Computer Science Bibliography
        if origen == 0: 
            
            if destino == 0:
                fich_nom = self.ventana.NombreFichero.toPlainText()
                fichero_destino = fich_nom+".xml"
                print fichero_destino
                if self.ventana.CheckBoxAutor.isChecked():
                    print("Calculo Indices de Autor")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<author"))
                    self.ventana.progressBar_2.setValue(5)
                    print indices
                if self.ventana.CheckBoxEditor.isChecked():
                    print("Calculo Indices de Editor")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<editor"))
                    self.ventana.progressBar_2.setValue(10)
                    print indices
                if self.ventana.CheckBoxPublicador.isChecked():
                    print("Calculo Indices de Publicador")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<publisher"))
                    self.ventana.progressBar_2.setValue(15)
                    print indices
                if self.ventana.CheckBoxTitulo.isChecked():
                    print("Calculo Indices de Titulo")
                    indices.extend(XML_PDFParser.buscar_fichero(fichero_origen,termino_busqueda,"<title"))
                    self.ventana.progressBar_2.setValue(20)
                    print indices

                indices.sort()                    
                
                if self.ventana.RadioButtonArticulo.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<article","</article")
                    self.ventana.progressBar_2.setValue(40)
                elif self.ventana.RadioButtonColeccion.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<incollection","</incollection")
                    self.ventana.progressBar_2.setValue(60)
                elif self.ventana.RadioButtonDebate.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<proceedings","</proceedings")
                    self.ventana.progressBar_2.setValue(70)
                elif self.ventana.RadioButtonTesis.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<phdthesis","</phdthesis")
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<mastersthesis","</mastersthesis")
                    self.ventana.progressBar_2.setValue(90)
                elif self.ventana.RadioButtonLibro.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<book","</book")
                    self.ventana.progressBar_2.setValue(95)
                elif self.ventana.RadioButtonOtros.isChecked():
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<www","</www")
                    XML_PDFParser.obten_grupo_xml(fichero_origen,fichero_destino,indices,"<inproceedings","</www")
                    self.ventana.progressBar_2.setValue(100)
                    
                print("PROCESADO FINALIZADO")
                self.ventana.progressBar_2.setValue(100)
            
            
        return 1

Ejemplo n.º 2

Mostrar archivo

if args.interfaz_activa == 1:
    Interfaz_Usuario()
    
elif args.descarga_activa == 1:
    
    # Descargamos los datos si es que es necesario
    URL = "http://dblp.uni-trier.de/xml/dblp.xml"
    URLInteract.descargar_fichero(URL)  
    
    # Comprobamos si se quieren procesar y lo hacemos
    if args.procesar_activa == 1:
    
        if args.origen_datos == "dblp":
        
            indices = XML_PDFParser.buscar_fichero("dblp.xml",args.termino_busqueda,"")
            
            etiquetas_apertura = ["<article","<incollection","<proceedings","<phdthesis","<mastersthesis","<book","<www","<inproceedings"]
            etiquetas_cierre = ["</article>","</incollection>","</proceedings>","</phdthesis>","</mastersthesis>","</book>","</www>","</inproceedings>"]            
            
            print indices            
            
            XML_PDFParser.obten_grupo_xml_multiple("dblp.xml",args.fichero_destino+"_temp.txt",indices,etiquetas_apertura,etiquetas_cierre)

            XML_PDFParser.eliminar_simbolos_html(args.fichero_destino+"_temp.txt",args.fichero_destino+"_temp2.txt")

            XML_PDFParser.formato_web(args.fichero_destino+"_temp2.txt",args.fichero_destino+".txt")
            
            if args.fusionar_activa == 1:
                
                XML_PDFParser.fusionar_ficheros(args.fichero_fusion,args.fichero_destino+".txt",args.fichero_destino+"_fusionado.txt")