Ejemplos de Extraction en Python

Lenguaje de programación: Python

Namespace/Package Name: app.extraction.models

Clase / Tipo: Extraction

Ejemplos en hotexamples.com: 3

Python Extraction - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de app.extraction.models.Extraction extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get_extraction_by_page_id(2)

add_extraction(1)

get_extraction(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: tasks.py Proyecto: prabuuce/corpusbuilder

def retrieve_extraction(page_id,add_if_not_found=True):
    print "retrieving Extraction for ....%s" % (page_id)
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    extraction = Extraction.get_extraction_by_page_id(page_id)
    if extraction is None:
        if add_if_not_found: # add a page
            extraction = Extraction.add_extraction(page_id)
        else:
            return extractionnotfound
    else:
        pass # do nothing
    #<-->We will not do boilerpipe extraction here... We are going to simply put an extraction page
    # But we will run a separate process that activates boilerpipe taking page.id and extraction.id.
    #boilerpipe_extract_and_populate.delay(page_id,extraction.id)
    
    #Using Rest API
    '''rExt = requests.get("http://127.0.0.1:5000/extractions", params={"page_id":page_id})

Ejemplo n.º 2

Mostrar archivo

Archivo: boilerpipe_wrapper.py Proyecto: prabuuce/corpusbuilder

 def extract_content(page_id, ext_id, htmlReturn=False): # htmlReturn=False: by default returns text content
     if (page_id is None or "") or (ext_id is None or ""): return badrequest()
     page = Page.get_page(page_id)
     if page is None: return documentnotfound()
     extraction = Extraction.get_extraction(ext_id)
     if extraction is None: return documentnotfound()
     original_content = page.content
     if original_content is None or original_content is "": return nocontent()
     
     if not jpype.isThreadAttachedToJVM():
         jpype.attachThreadToJVM()
     extractor = Extractor(extractor='DefaultExtractor', html=original_content)
     if not htmlReturn:
         bp_content = extractor.getText()
     else:
         bp_content = extractor.getHTML()
     if bp_content is None: nocontent()
     
     extraction.update(bp_content=bp_content)
     return success()

Ejemplo n.º 3

Mostrar archivo

Archivo: tasks.py Proyecto: prabuuce/corpusbuilder

def boilerpipe_extract_and_populate(page_id=None, ext_id=None):
    print "extracting using boilerpipe..."
    
    # For some reason this approach of directly calling the static method is not working
    '''with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    BoilerpipeExtraction.extract_content(page_id, ext_id)'''
    
    # Therefore, switching to calling the REST API. This seems to be working 
    #Using Rest API
    #return requests.get("http://127.0.0.1:5000/extractions/bp/%s,%s"%(page_id,ext_id))
    
    # approach 2:
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    for page in Page.get_all_pages():
        if page is not None:
            extraction = Extraction.get_extraction_by_page_id(page.id)
            requests.get("http://127.0.0.1:5000/extractions/bp/%s,%s"%(page.id,extraction.id))
        else:
            pass
    return