Python Delete Beispiele

Programmiersprache: Python

Namespace / Paketname: etl_delete

Klasse / Typ: Delete

Beispiele auf hotexamples.com: 4

Python Delete - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die etl_delete.Delete, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Delete(3)

delete(2)

verbose(1)

Häufig verwendete Methoden

Delete (3)

delete (2)

verbose (1)

Beispiel #1

Datei anzeigen

Datei: test_etl_file.py Projekt: vmsv/open-semantic-etl

    def test_pdf(self):

        etl_file = Connector_File()

        filename = os.path.dirname(
            os.path.realpath(__file__)) + '/test/test.pdf'

        # run ETL of test.pdf with configured plugins and PDF OCR (result of etl_file.py)
        parameters, data = etl_file.index_file(
            filename=filename, additional_plugins=['enhance_pdf_ocr'])

        # delete from search index
        etl_delete = Delete()
        etl_delete.delete(filename)

        # check extracted content type
        self.assertTrue(data['content_type_ss'] == 'application/pdf'
                        or data['content_type_ss']
                        == ['application/pdf', 'image/jpeg', 'image/png'])

        # check content type group which is mapped to this content type (result of plugin enhance_contenttype_group.py)
        self.assertTrue(data['content_type_group_ss'] == ['Text document']
                        or data['content_type_group_ss']
                        == ['Text document', 'Image', 'Image'])

        # check extracted title (result of plugin enhance_extract_text_tika_server.py)
        self.assertEqual(data['title_txt'], 'TestPDFtitle')

        # check extracted content of PDF text (result of plugin enhance_extract_text_tika_server.py)
        self.assertTrue(
            'TestPDFContent1 on TestPDFPage1' in data['content_txt'])
        self.assertTrue(
            'TestPDFContent2 on TestPDFPage2' in data['content_txt'])

        # check OCR of embedded images in PDF (result of plugin enhance_pdf_ocr.py)
        self.assertTrue('TestPDFOCRImage1Content1' in data['ocr_t'])
        self.assertTrue('TestPDFOCRImage1Content2' in data['ocr_t'])
        self.assertTrue('TestPDFOCRImage2Content1' in data['ocr_t'])
        self.assertTrue('TestPDFOCRImage2Content2' in data['ocr_t'])

        # check if a plugin threw an exception
        self.assertEqual(len(data['etl_error_plugins_ss']), 0)

Beispiel #2

Datei anzeigen

    def test_warc(self):

        etl_file = Connector_File()
        exporter = export_solr()

        filename = os.path.dirname(
            os.path.realpath(__file__)) + '/test/example.warc'

        # run ETL of example.warc with configured plugins and warc extractor
        parameters, data = etl_file.index_file(filename=filename)

        contained_doc_id = 'http://example.com/<urn:uuid:a9c51e3e-0221-11e7-bf66-0242ac120005>'
        fields = ['id', 'title_txt', 'content_type_ss', 'content_txt']

        data = exporter.get_data(contained_doc_id, fields)

        # delete from search index
        etl_delete = Delete()
        etl_delete.delete(filename)
        etl_delete.delete(contained_doc_id)

        self.assertEqual(data['title_txt'], ['Example Domain'])

        self.assertEqual(data['content_type_ss'], ['text/html; charset=UTF-8'])

        self.assertTrue(
            'This domain is established to be used for illustrative examples in documents.'
            in data['content_txt'][0])

Beispiel #3

Datei anzeigen

    broker = os.getenv('OPEN_SEMANTIC_ETL_MQ_BROKER')

app = Celery('etl.tasks', broker=broker)

app.conf.CELERY_QUEUES = [
    Queue('tasks',
          Exchange('tasks'),
          routing_key='tasks',
          queue_arguments={'x-max-priority': 10})
]

app.conf.CELERYD_MAX_TASKS_PER_CHILD = 1
app.conf.CELERYD_PREFETCH_MULTIPLIER = 1
app.conf.CELERY_ACKS_LATE = True

etl_delete = Delete()
etl_web = Connector_Web()
etl_rss = Connector_RSS()

#
# Delete document with URI from index
#


@app.task(name='etl.delete')
def delete(uri):
    etl_delete.delete(uri=uri)


#
# Index a file

Beispiel #4

Datei anzeigen

Datei: tasks.py Projekt: opensemanticsearch/open-semantic-etl

# ETL connectors
from etl import ETL
from etl_delete import Delete
from etl_file import Connector_File
from etl_web import Connector_Web
from etl_rss import Connector_RSS


verbose = True
quiet = False

app = Celery('etl.tasks')
app.conf.CELERYD_MAX_TASKS_PER_CHILD = 1

etl_delete = Delete()
etl_web = Connector_Web()
etl_rss = Connector_RSS()


#
# Delete document with URI from index
#

@app.task(name='etl.delete')
def delete(uri):
	etl_delete.delete(uri=uri)


#
# Index a file