def process_bulk_docs(self, docs): if len(docs) == 0: return True pillow_logging.info("Processing batch of %s docs", len((docs))) changes = [self._doc_to_change(doc) for doc in docs] error_collector = ErrorCollector() bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error procesing doc %s: %s", change.id, exception) payloads = prepare_bulk_payloads(bulk_changes, MAX_PAYLOAD_SIZE) if len(payloads) > 1: pillow_logging.info("Payload split into %s parts" % len(payloads)) for payload in payloads: success = self._send_payload_with_retries(payload) if not success: # stop the reindexer if we're unable to send a payload to ES return False return True
def process_bulk_docs(self, docs): if len(docs) == 0: return True pillow_logging.info("Processing batch of %s docs", len((docs))) changes = [self._doc_to_change(doc) for doc in docs] error_collector = ErrorCollector() bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector) for change, exception in error_collector.errors: pillow_logging.error("Error procesing doc %s: %s", change.id, exception) max_payload_size = pow(10, 8) # ~ 100Mb payloads = prepare_bulk_payloads(bulk_changes, max_payload_size) if len(payloads) > 1: pillow_logging.info("Payload split into %s parts" % len(payloads)) for payload in payloads: success = self._send_payload_with_retries(payload) if not success: # stop the reindexer if we're unable to send a payload to ES return False return True
def test_prepare_bulk_payloads_unicode(self): unicode_domain = u'हिंदी' bulk_changes = [ {'id': 'doc1'}, {'id': 'doc2', 'domain': unicode_domain}, ] payloads = prepare_bulk_payloads(bulk_changes, max_size=10, chunk_size=1) self.assertEqual(2, len(payloads)) self.assertEqual(unicode_domain, json.loads(payloads[1])['domain'])
def test_prepare_bulk_payloads2(self, max_size, chunk_size, expected_payloads): bulk_changes = [{'id': 'doc%s' % i} for i in range(10)] payloads = prepare_bulk_payloads(bulk_changes, max_size=max_size, chunk_size=chunk_size) self.assertEqual(expected_payloads, len(payloads)) self.assertTrue(all(payloads)) # check that we can reform the original list of changes json_docs = ''.join(payloads).strip().split('\n') reformed_changes = [json.loads(doc) for doc in json_docs] self.assertEqual(bulk_changes, reformed_changes)
def process_bulk(self, changes): if not changes: return self.allow_updates = False self.bulk = True bstart = datetime.utcnow() bulk_changes = self.bulk_builder(changes) max_payload_size = pow(10, 8) # ~ 100Mb payloads = prepare_bulk_payloads(bulk_changes, max_payload_size) if len(payloads) > 1: pillow_logging.info("%s,payload split into %s parts" % (self.get_name(), len(payloads))) pillow_logging.info( "%s,prepare_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - bstart) / 1000.0))) send_start = datetime.utcnow() for payload in payloads: self.send_bulk(payload) pillow_logging.info( "%s,send_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - send_start) / 1000.0)))