Ejemplo n.º 1
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s", change.id, exception)

        payloads = prepare_bulk_payloads(bulk_changes, MAX_PAYLOAD_SIZE)
        if len(payloads) > 1:
            pillow_logging.info("Payload split into %s parts" % len(payloads))

        for payload in payloads:
            success = self._send_payload_with_retries(payload)
            if not success:
                # stop the reindexer if we're unable to send a payload to ES
                return False

        return True
Ejemplo n.º 2
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s", change.id, exception)

        max_payload_size = pow(10, 8)  # ~ 100Mb
        payloads = prepare_bulk_payloads(bulk_changes, max_payload_size)
        if len(payloads) > 1:
            pillow_logging.info("Payload split into %s parts" % len(payloads))

        for payload in payloads:
            success = self._send_payload_with_retries(payload)
            if not success:
                # stop the reindexer if we're unable to send a payload to ES
                return False

        return True
Ejemplo n.º 3
0
 def test_prepare_bulk_payloads_unicode(self):
     unicode_domain = u'हिंदी'
     bulk_changes = [
         {'id': 'doc1'},
         {'id': 'doc2', 'domain': unicode_domain},
     ]
     payloads = prepare_bulk_payloads(bulk_changes, max_size=10, chunk_size=1)
     self.assertEqual(2, len(payloads))
     self.assertEqual(unicode_domain, json.loads(payloads[1])['domain'])
Ejemplo n.º 4
0
 def test_prepare_bulk_payloads_unicode(self):
     unicode_domain = u'हिंदी'
     bulk_changes = [
         {'id': 'doc1'},
         {'id': 'doc2', 'domain': unicode_domain},
     ]
     payloads = prepare_bulk_payloads(bulk_changes, max_size=10, chunk_size=1)
     self.assertEqual(2, len(payloads))
     self.assertEqual(unicode_domain, json.loads(payloads[1])['domain'])
Ejemplo n.º 5
0
def test_prepare_bulk_payloads2(self, max_size, chunk_size, expected_payloads):
    bulk_changes = [{'id': 'doc%s' % i} for i in range(10)]
    payloads = prepare_bulk_payloads(bulk_changes, max_size=max_size, chunk_size=chunk_size)
    self.assertEqual(expected_payloads, len(payloads))
    self.assertTrue(all(payloads))

    # check that we can reform the original list of changes
    json_docs = ''.join(payloads).strip().split('\n')
    reformed_changes = [json.loads(doc) for doc in json_docs]
    self.assertEqual(bulk_changes, reformed_changes)
Ejemplo n.º 6
0
def test_prepare_bulk_payloads2(self, max_size, chunk_size, expected_payloads):
    bulk_changes = [{'id': 'doc%s' % i} for i in range(10)]
    payloads = prepare_bulk_payloads(bulk_changes, max_size=max_size, chunk_size=chunk_size)
    self.assertEqual(expected_payloads, len(payloads))
    self.assertTrue(all(payloads))

    # check that we can reform the original list of changes
    json_docs = ''.join(payloads).strip().split('\n')
    reformed_changes = [json.loads(doc) for doc in json_docs]
    self.assertEqual(bulk_changes, reformed_changes)
Ejemplo n.º 7
0
    def process_bulk(self, changes):
        if not changes:
            return
        self.allow_updates = False
        self.bulk = True
        bstart = datetime.utcnow()
        bulk_changes = self.bulk_builder(changes)

        max_payload_size = pow(10, 8)  # ~ 100Mb
        payloads = prepare_bulk_payloads(bulk_changes, max_payload_size)
        if len(payloads) > 1:
            pillow_logging.info("%s,payload split into %s parts" % (self.get_name(), len(payloads)))

        pillow_logging.info(
            "%s,prepare_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - bstart) / 1000.0)))
        send_start = datetime.utcnow()
        for payload in payloads:
            self.send_bulk(payload)
        pillow_logging.info(
            "%s,send_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - send_start) / 1000.0)))