def test_documents_in_sequence(self):

        doc_refs = []
        doc1 = self._generate_document(
            sequence_number=1
        )
        doc_refs.append(weakref.ref(doc1))
        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        sequence.add_document(doc1)
        seq_ref = weakref.ref(sequence)
        del doc1
        for number in xrange(2, 10):
            doc = self._generate_document(
                sequence_number=number,
                offset=timedelta(seconds=5*number)
            )
            doc.validate()
            doc.get_xml()
            doc_refs.append(weakref.ref(doc))
            sequence.add_document(doc)
            del doc
        gc.collect()
        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)
        sequence.cleanup()
        del sequence
        gc.collect()
        self.assertIsNone(seq_ref())
        for item in doc_refs:
            self.assertIsNone(item())
Exemple #2
0
    def process_document(self, document, **kwargs):
        if self.is_document(document):
            self.limit_sequence_to_one(document)

            if self._sequence is None:
                # Create sequence from document
                log.info('Creating document sequence from first document {}'.format(
                    document
                ))
                self._sequence = EBUTT3DocumentSequence.create_from_document(document, verbose=self._verbose)
                if self._reference_clock is None:
                    self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

            document_logger.info(DOC_RECEIVED.format(
                sequence_number=document.sequence_number,
                sequence_identifier=document.sequence_identifier,
                computed_begin_time=document.computed_begin_time,
                computed_end_time=document.computed_end_time
            ))
            try:
                self._sequence.add_document(document)
            except SequenceNumberCollisionError:
                log.info(
                    'Consumer ignoring duplicate seq number: {}'.format(
                        document.sequence_number
                    )
                )
Exemple #3
0
    def test_discard_partial_sequence(self):

        doc1 = self._generate_document(sequence_number=1,
                                       offset=timedelta(seconds=0))
        doc2 = self._generate_document(sequence_number=2,
                                       offset=timedelta(seconds=5))
        doc3 = self._generate_document(sequence_number=3,
                                       offset=timedelta(seconds=10))

        doc_refs = [weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3)]

        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        seq_ref = weakref.ref(sequence)

        sequence.add_document(doc1)
        sequence.add_document(doc2)
        sequence.add_document(doc3)

        del doc1
        del doc2
        del doc3

        gc.collect()

        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)

        sequence.discard_before(doc_refs[1]())

        gc.collect()

        self.assertIsNone(doc_refs[0]())
        self.assertIsInstance(doc_refs[1](), EBUTT3Document)
        self.assertIsInstance(doc_refs[2](), EBUTT3Document)
Exemple #4
0
    def test_documents_in_sequence(self):

        doc_refs = []
        doc1 = self._generate_document(sequence_number=1)
        doc_refs.append(weakref.ref(doc1))
        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        sequence.add_document(doc1)
        seq_ref = weakref.ref(sequence)
        del doc1
        for number in xrange(2, 10):
            doc = self._generate_document(sequence_number=number,
                                          offset=timedelta(seconds=5 * number))
            doc.validate()
            doc.get_xml()
            doc_refs.append(weakref.ref(doc))
            sequence.add_document(doc)
            del doc
        gc.collect()
        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)
        sequence.cleanup()
        del sequence
        gc.collect()
        self.assertIsNone(seq_ref())
        for item in doc_refs:
            self.assertIsNone(item())
    def process_document(self, document, **kwargs):
        """
        The specified functionality is met by keeping the following priorities in mind when processing an
        incoming document:

          - If this is a new sequence_identifier+sequence_number pair and
            authorsGroupIdentifier+authorsGroupControlToken are defined and authorsGroupIdentifier matches the
            configuration

            - If sequenceIdentifier matches the one selected the document should be emitted
            - If sequenceIdentifier does not match the one selected or one is not selected

              - If the token is higher than our current one or one is not set the document should be emitted
              - If token is lower than the our current one or missing the document should be ignored

            - If the document should be emitted

              - Set/update current sequenceIdentifier in the node from the one in the document
              - Set/update current authorsGroupControlToken in the node
              - Reassign document to output sequenceIdentifier
              - Assign new sequenceNumber to document
              - Emit document


        :param document:
        :param kwargs:

        """
        emit = False

        if self.is_document(document):
            # This step is necessary to ensure that the authors group does not change in a sequence
            self._known_sequences.setdefault(
                document.sequence_identifier,
                EBUTT3DocumentSequence.create_from_document(
                    document=document)).is_compatible(document=document)
            if self.check_if_document_seen(document=document) is True \
                    and document.authors_group_identifier == self._authors_group_identifier \
                    and document.authors_group_control_token is not None:
                if self._current_token is None or self._current_token < document.authors_group_control_token:
                    # Switch input
                    self._current_selected_input_sequence_id = document.sequence_identifier
                    emit = True
                elif self._current_selected_input_sequence_id == document.sequence_identifier:
                    emit = True

            if emit is True:
                # Update token
                self._current_token = document.authors_group_control_token  # So are we going to error here?
                self._last_sequence_number += 1

                document.authors_group_selected_sequence_identifier = document.sequence_identifier
                document.sequence_identifier = self._sequence_identifier
                document.sequence_number = self._last_sequence_number
                self.producer_carriage.emit_data(data=document, **kwargs)
        else:
            document.sequence_identifier = self._sequence_identifier
            self.producer_carriage.emit_data(data=document, **kwargs)
Exemple #6
0
    def process_document(self, document):
        if self._sequence is None:
            # Create sequence from document
            log.info("Creating document sequence from first document {}".format(document))
            self._sequence = EBUTT3DocumentSequence.create_from_document(document)
            self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

        document_logger.info(
            DOC_RECEIVED.format(
                sequence_number=document.sequence_number,
                sequence_identifier=document.sequence_identifier,
                computed_begin_time=document.computed_begin_time,
                computed_end_time=document.computed_end_time,
            )
        )
        self._sequence.add_document(document)
    def process_document(self, document):
        if self._sequence is None:
            # Create sequence from document
            log.info('Creating document sequence from first document {}'.format(
                document
            ))
            self._sequence = EBUTT3DocumentSequence.create_from_document(document)
            self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

        document_logger.info(DOC_RECEIVED.format(
            sequence_number=document.sequence_number,
            sequence_identifier=document.sequence_identifier,
            computed_begin_time=document.computed_begin_time,
            computed_end_time=document.computed_end_time
        ))
        self._sequence.add_document(document)
    def test_discard_partial_sequence(self):

        doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0))
        doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5))
        doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10))

        doc_refs = [
            weakref.ref(doc1),
            weakref.ref(doc2),
            weakref.ref(doc3)
        ]

        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        seq_ref = weakref.ref(sequence)

        sequence.add_document(doc1)
        sequence.add_document(doc2)
        sequence.add_document(doc3)

        del doc1
        del doc2
        del doc3

        gc.collect()

        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)

        sequence.discard_before(doc_refs[1]())

        gc.collect()

        self.assertIsNone(doc_refs[0]())
        self.assertIsInstance(doc_refs[1](), EBUTT3Document)
        self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def then_adding_doc2_success(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    with pytest.raises(Exception):
        sequence.add_document(document)
def then_adding_doc2_success(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    with pytest.raises(Exception):
        sequence.add_document(document)
Exemple #13
0
 def create_sequence_from_document(self, document):
     self._sequence = EBUTT3DocumentSequence.create_from_document(
         document, verbose=self._verbose)
     if self._reference_clock is None:
         self._reference_clock = self._sequence.reference_clock