Exemple #1
0
    def test_discard_partial_sequence(self):

        doc1 = self._generate_document(sequence_number=1,
                                       offset=timedelta(seconds=0))
        doc2 = self._generate_document(sequence_number=2,
                                       offset=timedelta(seconds=5))
        doc3 = self._generate_document(sequence_number=3,
                                       offset=timedelta(seconds=10))

        doc_refs = [weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3)]

        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        seq_ref = weakref.ref(sequence)

        sequence.add_document(doc1)
        sequence.add_document(doc2)
        sequence.add_document(doc3)

        del doc1
        del doc2
        del doc3

        gc.collect()

        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)

        sequence.discard_before(doc_refs[1]())

        gc.collect()

        self.assertIsNone(doc_refs[0]())
        self.assertIsInstance(doc_refs[1](), EBUTT3Document)
        self.assertIsInstance(doc_refs[2](), EBUTT3Document)
    def test_documents_in_sequence(self):

        doc_refs = []
        doc1 = self._generate_document(
            sequence_number=1
        )
        doc_refs.append(weakref.ref(doc1))
        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        sequence.add_document(doc1)
        seq_ref = weakref.ref(sequence)
        del doc1
        for number in xrange(2, 10):
            doc = self._generate_document(
                sequence_number=number,
                offset=timedelta(seconds=5*number)
            )
            doc.validate()
            doc.get_xml()
            doc_refs.append(weakref.ref(doc))
            sequence.add_document(doc)
            del doc
        gc.collect()
        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)
        sequence.cleanup()
        del sequence
        gc.collect()
        self.assertIsNone(seq_ref())
        for item in doc_refs:
            self.assertIsNone(item())
Exemple #3
0
    def process_document(self, document, **kwargs):
        if self.is_document(document):
            self.limit_sequence_to_one(document)

            if self._sequence is None:
                # Create sequence from document
                log.info('Creating document sequence from first document {}'.format(
                    document
                ))
                self._sequence = EBUTT3DocumentSequence.create_from_document(document, verbose=self._verbose)
                if self._reference_clock is None:
                    self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

            document_logger.info(DOC_RECEIVED.format(
                sequence_number=document.sequence_number,
                sequence_identifier=document.sequence_identifier,
                computed_begin_time=document.computed_begin_time,
                computed_end_time=document.computed_end_time
            ))
            try:
                self._sequence.add_document(document)
            except SequenceNumberCollisionError:
                log.info(
                    'Consumer ignoring duplicate seq number: {}'.format(
                        document.sequence_number
                    )
                )
Exemple #4
0
    def test_documents_in_sequence(self):

        doc_refs = []
        doc1 = self._generate_document(sequence_number=1)
        doc_refs.append(weakref.ref(doc1))
        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        sequence.add_document(doc1)
        seq_ref = weakref.ref(sequence)
        del doc1
        for number in xrange(2, 10):
            doc = self._generate_document(sequence_number=number,
                                          offset=timedelta(seconds=5 * number))
            doc.validate()
            doc.get_xml()
            doc_refs.append(weakref.ref(doc))
            sequence.add_document(doc)
            del doc
        gc.collect()
        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)
        sequence.cleanup()
        del sequence
        gc.collect()
        self.assertIsNone(seq_ref())
        for item in doc_refs:
            self.assertIsNone(item())
    def process_document(self, document, **kwargs):
        """
        The specified functionality is met by keeping the following priorities in mind when processing an
        incoming document:

          - If this is a new sequence_identifier+sequence_number pair and
            authorsGroupIdentifier+authorsGroupControlToken are defined and authorsGroupIdentifier matches the
            configuration

            - If sequenceIdentifier matches the one selected the document should be emitted
            - If sequenceIdentifier does not match the one selected or one is not selected

              - If the token is higher than our current one or one is not set the document should be emitted
              - If token is lower than the our current one or missing the document should be ignored

            - If the document should be emitted

              - Set/update current sequenceIdentifier in the node from the one in the document
              - Set/update current authorsGroupControlToken in the node
              - Reassign document to output sequenceIdentifier
              - Assign new sequenceNumber to document
              - Emit document


        :param document:
        :param kwargs:

        """
        emit = False

        if self.is_document(document):
            # This step is necessary to ensure that the authors group does not change in a sequence
            self._known_sequences.setdefault(
                document.sequence_identifier,
                EBUTT3DocumentSequence.create_from_document(
                    document=document)).is_compatible(document=document)
            if self.check_if_document_seen(document=document) is True \
                    and document.authors_group_identifier == self._authors_group_identifier \
                    and document.authors_group_control_token is not None:
                if self._current_token is None or self._current_token < document.authors_group_control_token:
                    # Switch input
                    self._current_selected_input_sequence_id = document.sequence_identifier
                    emit = True
                elif self._current_selected_input_sequence_id == document.sequence_identifier:
                    emit = True

            if emit is True:
                # Update token
                self._current_token = document.authors_group_control_token  # So are we going to error here?
                self._last_sequence_number += 1

                document.authors_group_selected_sequence_identifier = document.sequence_identifier
                document.sequence_identifier = self._sequence_identifier
                document.sequence_number = self._last_sequence_number
                self.producer_carriage.emit_data(data=document, **kwargs)
        else:
            document.sequence_identifier = self._sequence_identifier
            self.producer_carriage.emit_data(data=document, **kwargs)
    def setUp(self):
        self.reference_clock = LocalMachineClock()
        self.sequence = EBUTT3DocumentSequence(
            sequence_identifier='sequenceTesting',
            reference_clock=self.reference_clock,
            lang='en-GB')

        self.document1 = self._create_document(1, 2)
        self.document2 = self._create_document(3, 4)
        self.document3 = self._create_document(5, 6)
def sequence(sequence_identifier, time_base):
    ref_clock = None
    if time_base == 'clock':
        ref_clock = LocalMachineClock()
    elif time_base == 'media':
        ref_clock = MediaClock()
    elif time_base == 'smpte':
        raise NotImplementedError()
    sequence = EBUTT3DocumentSequence(sequence_identifier, ref_clock, 'en-GB', verbose=True)
    return sequence
Exemple #8
0
    def process_document(self, document):
        if self._sequence is None:
            # Create sequence from document
            log.info("Creating document sequence from first document {}".format(document))
            self._sequence = EBUTT3DocumentSequence.create_from_document(document)
            self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

        document_logger.info(
            DOC_RECEIVED.format(
                sequence_number=document.sequence_number,
                sequence_identifier=document.sequence_identifier,
                computed_begin_time=document.computed_begin_time,
                computed_end_time=document.computed_end_time,
            )
        )
        self._sequence.add_document(document)
    def process_document(self, document):
        if self._sequence is None:
            # Create sequence from document
            log.info('Creating document sequence from first document {}'.format(
                document
            ))
            self._sequence = EBUTT3DocumentSequence.create_from_document(document)
            self._reference_clock = self._sequence.reference_clock
            if document.availability_time is None:
                document.availability_time = self._reference_clock.get_time()

        document_logger.info(DOC_RECEIVED.format(
            sequence_number=document.sequence_number,
            sequence_identifier=document.sequence_identifier,
            computed_begin_time=document.computed_begin_time,
            computed_end_time=document.computed_end_time
        ))
        self._sequence.add_document(document)
    def test_discard_partial_sequence(self):

        doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0))
        doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5))
        doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10))

        doc_refs = [
            weakref.ref(doc1),
            weakref.ref(doc2),
            weakref.ref(doc3)
        ]

        sequence = EBUTT3DocumentSequence.create_from_document(doc1)
        seq_ref = weakref.ref(sequence)

        sequence.add_document(doc1)
        sequence.add_document(doc2)
        sequence.add_document(doc3)

        del doc1
        del doc2
        del doc3

        gc.collect()

        self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence)
        for item in doc_refs:
            self.assertIsInstance(item(), EBUTT3Document)

        sequence.discard_before(doc_refs[1]())

        gc.collect()

        self.assertIsNone(doc_refs[0]())
        self.assertIsInstance(doc_refs[1](), EBUTT3Document)
        self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def main():
    create_loggers()

    parsed_args = parser.parse_args()

    do_export = False
    if parsed_args.folder_export:
        do_export = True

    reference_clock = LocalMachineClock()
    reference_clock.clock_mode = 'local'

    document_sequence = EBUTT3DocumentSequence(
        sequence_identifier='TestSequence1',
        lang='en-GB',
        reference_clock=reference_clock
    )

    if parsed_args.reference_clock:
        subtitle_tokens = None  # Instead of text we provide the availability time as content.
    else:
        # Let's read our example conversation
        full_text = get_example_data('simple_producer.txt')
        if do_export:
            subtitle_tokens = iter(tokenize_english_document(full_text))
        else:
            # This makes the source cycle infinitely.
            subtitle_tokens = cycle(tokenize_english_document(full_text))

    # This object is used as flexible binding to the carriage mechanism and twisted integrated as dependency injection
    prod_impl = None
    if do_export:
        prod_impl = FilesystemProducerImpl(parsed_args.folder_export)
    else:
        prod_impl = TwistedProducerImpl()

    simple_producer = SimpleProducer(
        node_id='simple-producer',
        carriage_impl=prod_impl,
        document_sequence=document_sequence,
        input_blocks=subtitle_tokens
    )

    if do_export:
        prod_impl.resume_producing()
    else:
        factory = wsFactory(u"ws://127.0.0.1:9000")

        factory.protocol = StreamingServerProtocol

        factory.listen()

        # We are using a pull producer because it is the looping_task timer that triggers the production from the websocket
        # level. Every time the factory gets a pull signal from the timer it tells the producer to generate data.
        TwistedPullProducer(
            consumer=factory,
            custom_producer=prod_impl
        )

        looping_task = task.LoopingCall(factory.pull)

        looping_task.start(2.0)

        reactor.run()
def then_adding_doc2_success(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    with pytest.raises(Exception):
        sequence.add_document(document)
def then_adding_doc2_success(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict):
    xml_file = template_file.render(template_dict)
    document = EBUTT3Document.create_from_xml(xml_file)
    sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0])
    with pytest.raises(Exception):
        sequence.add_document(document)
Exemple #16
0
 def create_sequence_from_document(self, document):
     self._sequence = EBUTT3DocumentSequence.create_from_document(
         document, verbose=self._verbose)
     if self._reference_clock is None:
         self._reference_clock = self._sequence.reference_clock
def main():
    create_loggers()

    parsed_args = parser.parse_args()

    sequence_identifier = 'TestSequence1'

    do_export = False
    if parsed_args.folder_export:
        do_export = True

    reference_clock = LocalMachineClock()
    reference_clock.clock_mode = 'local'

    document_sequence = EBUTT3DocumentSequence(
        sequence_identifier=sequence_identifier,
        lang='en-GB',
        reference_clock=reference_clock
    )

    if parsed_args.reference_clock:
        subtitle_tokens = None  # Instead of text we provide the availability time as content.
    else:
        # Let's read our example conversation
        full_text = get_example_data('simple_producer.txt')
        if do_export:
            subtitle_tokens = iter(tokenize_english_document(full_text))
        else:
            # This makes the source cycle infinitely.
            subtitle_tokens = cycle(tokenize_english_document(full_text))

    # This object is used as flexible binding to the carriage mechanism and twisted integrated as dependency injection
    prod_impl = None
    if do_export:
        prod_impl = FilesystemProducerImpl(parsed_args.folder_export, reference_clock)
    else:
        prod_impl = WebsocketProducerCarriage()
        prod_impl.sequence_identifier = sequence_identifier

    simple_producer = SimpleProducer(
        node_id='simple-producer',
        producer_carriage=None,
        document_sequence=document_sequence,
        input_blocks=subtitle_tokens
    )

    # Chaining a converter
    ProducerNodeCarriageAdapter(
        producer_carriage=prod_impl,
        producer_node=simple_producer
    )

    if do_export:
        prod_impl.resume_producing()
    else:

        twisted_producer = TwistedWSPushProducer(
            custom_producer=prod_impl
        )

        factory = BroadcastServerFactory(
            url=u"ws://127.0.0.1:9000",
            producer=twisted_producer
        )

        factory.protocol = BroadcastServerProtocol

        factory.listen()

        # Here we schedule in the simple producer to create content responding to a periodic interval timer.
        looping_task = task.LoopingCall(simple_producer.process_document)

        looping_task.start(2.0)

        reactor.run()