def test_documents_in_sequence(self): doc_refs = [] doc1 = self._generate_document( sequence_number=1 ) doc_refs.append(weakref.ref(doc1)) sequence = EBUTT3DocumentSequence.create_from_document(doc1) sequence.add_document(doc1) seq_ref = weakref.ref(sequence) del doc1 for number in xrange(2, 10): doc = self._generate_document( sequence_number=number, offset=timedelta(seconds=5*number) ) doc.validate() doc.get_xml() doc_refs.append(weakref.ref(doc)) sequence.add_document(doc) del doc gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.cleanup() del sequence gc.collect() self.assertIsNone(seq_ref()) for item in doc_refs: self.assertIsNone(item())
def process_document(self, document, **kwargs): if self.is_document(document): self.limit_sequence_to_one(document) if self._sequence is None: # Create sequence from document log.info('Creating document sequence from first document {}'.format( document )) self._sequence = EBUTT3DocumentSequence.create_from_document(document, verbose=self._verbose) if self._reference_clock is None: self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info(DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time )) try: self._sequence.add_document(document) except SequenceNumberCollisionError: log.info( 'Consumer ignoring duplicate seq number: {}'.format( document.sequence_number ) )
def test_discard_partial_sequence(self): doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0)) doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5)) doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10)) doc_refs = [weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3)] sequence = EBUTT3DocumentSequence.create_from_document(doc1) seq_ref = weakref.ref(sequence) sequence.add_document(doc1) sequence.add_document(doc2) sequence.add_document(doc3) del doc1 del doc2 del doc3 gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.discard_before(doc_refs[1]()) gc.collect() self.assertIsNone(doc_refs[0]()) self.assertIsInstance(doc_refs[1](), EBUTT3Document) self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def test_documents_in_sequence(self): doc_refs = [] doc1 = self._generate_document(sequence_number=1) doc_refs.append(weakref.ref(doc1)) sequence = EBUTT3DocumentSequence.create_from_document(doc1) sequence.add_document(doc1) seq_ref = weakref.ref(sequence) del doc1 for number in xrange(2, 10): doc = self._generate_document(sequence_number=number, offset=timedelta(seconds=5 * number)) doc.validate() doc.get_xml() doc_refs.append(weakref.ref(doc)) sequence.add_document(doc) del doc gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.cleanup() del sequence gc.collect() self.assertIsNone(seq_ref()) for item in doc_refs: self.assertIsNone(item())
def process_document(self, document, **kwargs): """ The specified functionality is met by keeping the following priorities in mind when processing an incoming document: - If this is a new sequence_identifier+sequence_number pair and authorsGroupIdentifier+authorsGroupControlToken are defined and authorsGroupIdentifier matches the configuration - If sequenceIdentifier matches the one selected the document should be emitted - If sequenceIdentifier does not match the one selected or one is not selected - If the token is higher than our current one or one is not set the document should be emitted - If token is lower than the our current one or missing the document should be ignored - If the document should be emitted - Set/update current sequenceIdentifier in the node from the one in the document - Set/update current authorsGroupControlToken in the node - Reassign document to output sequenceIdentifier - Assign new sequenceNumber to document - Emit document :param document: :param kwargs: """ emit = False if self.is_document(document): # This step is necessary to ensure that the authors group does not change in a sequence self._known_sequences.setdefault( document.sequence_identifier, EBUTT3DocumentSequence.create_from_document( document=document)).is_compatible(document=document) if self.check_if_document_seen(document=document) is True \ and document.authors_group_identifier == self._authors_group_identifier \ and document.authors_group_control_token is not None: if self._current_token is None or self._current_token < document.authors_group_control_token: # Switch input self._current_selected_input_sequence_id = document.sequence_identifier emit = True elif self._current_selected_input_sequence_id == document.sequence_identifier: emit = True if emit is True: # Update token self._current_token = document.authors_group_control_token # So are we going to error here? self._last_sequence_number += 1 document.authors_group_selected_sequence_identifier = document.sequence_identifier document.sequence_identifier = self._sequence_identifier document.sequence_number = self._last_sequence_number self.producer_carriage.emit_data(data=document, **kwargs) else: document.sequence_identifier = self._sequence_identifier self.producer_carriage.emit_data(data=document, **kwargs)
def process_document(self, document): if self._sequence is None: # Create sequence from document log.info("Creating document sequence from first document {}".format(document)) self._sequence = EBUTT3DocumentSequence.create_from_document(document) self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info( DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time, ) ) self._sequence.add_document(document)
def process_document(self, document): if self._sequence is None: # Create sequence from document log.info('Creating document sequence from first document {}'.format( document )) self._sequence = EBUTT3DocumentSequence.create_from_document(document) self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info(DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time )) self._sequence.add_document(document)
def test_discard_partial_sequence(self): doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0)) doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5)) doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10)) doc_refs = [ weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3) ] sequence = EBUTT3DocumentSequence.create_from_document(doc1) seq_ref = weakref.ref(sequence) sequence.add_document(doc1) sequence.add_document(doc2) sequence.add_document(doc3) del doc1 del doc2 del doc3 gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.discard_before(doc_refs[1]()) gc.collect() self.assertIsNone(doc_refs[0]()) self.assertIsInstance(doc_refs[1](), EBUTT3Document) self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def then_adding_doc2_success(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) with pytest.raises(Exception): sequence.add_document(document)
def then_adding_doc2_success(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) with pytest.raises(Exception): sequence.add_document(document)
def create_sequence_from_document(self, document): self._sequence = EBUTT3DocumentSequence.create_from_document( document, verbose=self._verbose) if self._reference_clock is None: self._reference_clock = self._sequence.reference_clock