def test_discard_partial_sequence(self): doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0)) doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5)) doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10)) doc_refs = [weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3)] sequence = EBUTT3DocumentSequence.create_from_document(doc1) seq_ref = weakref.ref(sequence) sequence.add_document(doc1) sequence.add_document(doc2) sequence.add_document(doc3) del doc1 del doc2 del doc3 gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.discard_before(doc_refs[1]()) gc.collect() self.assertIsNone(doc_refs[0]()) self.assertIsInstance(doc_refs[1](), EBUTT3Document) self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def test_documents_in_sequence(self): doc_refs = [] doc1 = self._generate_document( sequence_number=1 ) doc_refs.append(weakref.ref(doc1)) sequence = EBUTT3DocumentSequence.create_from_document(doc1) sequence.add_document(doc1) seq_ref = weakref.ref(sequence) del doc1 for number in xrange(2, 10): doc = self._generate_document( sequence_number=number, offset=timedelta(seconds=5*number) ) doc.validate() doc.get_xml() doc_refs.append(weakref.ref(doc)) sequence.add_document(doc) del doc gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.cleanup() del sequence gc.collect() self.assertIsNone(seq_ref()) for item in doc_refs: self.assertIsNone(item())
def process_document(self, document, **kwargs): if self.is_document(document): self.limit_sequence_to_one(document) if self._sequence is None: # Create sequence from document log.info('Creating document sequence from first document {}'.format( document )) self._sequence = EBUTT3DocumentSequence.create_from_document(document, verbose=self._verbose) if self._reference_clock is None: self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info(DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time )) try: self._sequence.add_document(document) except SequenceNumberCollisionError: log.info( 'Consumer ignoring duplicate seq number: {}'.format( document.sequence_number ) )
def test_documents_in_sequence(self): doc_refs = [] doc1 = self._generate_document(sequence_number=1) doc_refs.append(weakref.ref(doc1)) sequence = EBUTT3DocumentSequence.create_from_document(doc1) sequence.add_document(doc1) seq_ref = weakref.ref(sequence) del doc1 for number in xrange(2, 10): doc = self._generate_document(sequence_number=number, offset=timedelta(seconds=5 * number)) doc.validate() doc.get_xml() doc_refs.append(weakref.ref(doc)) sequence.add_document(doc) del doc gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.cleanup() del sequence gc.collect() self.assertIsNone(seq_ref()) for item in doc_refs: self.assertIsNone(item())
def process_document(self, document, **kwargs): """ The specified functionality is met by keeping the following priorities in mind when processing an incoming document: - If this is a new sequence_identifier+sequence_number pair and authorsGroupIdentifier+authorsGroupControlToken are defined and authorsGroupIdentifier matches the configuration - If sequenceIdentifier matches the one selected the document should be emitted - If sequenceIdentifier does not match the one selected or one is not selected - If the token is higher than our current one or one is not set the document should be emitted - If token is lower than the our current one or missing the document should be ignored - If the document should be emitted - Set/update current sequenceIdentifier in the node from the one in the document - Set/update current authorsGroupControlToken in the node - Reassign document to output sequenceIdentifier - Assign new sequenceNumber to document - Emit document :param document: :param kwargs: """ emit = False if self.is_document(document): # This step is necessary to ensure that the authors group does not change in a sequence self._known_sequences.setdefault( document.sequence_identifier, EBUTT3DocumentSequence.create_from_document( document=document)).is_compatible(document=document) if self.check_if_document_seen(document=document) is True \ and document.authors_group_identifier == self._authors_group_identifier \ and document.authors_group_control_token is not None: if self._current_token is None or self._current_token < document.authors_group_control_token: # Switch input self._current_selected_input_sequence_id = document.sequence_identifier emit = True elif self._current_selected_input_sequence_id == document.sequence_identifier: emit = True if emit is True: # Update token self._current_token = document.authors_group_control_token # So are we going to error here? self._last_sequence_number += 1 document.authors_group_selected_sequence_identifier = document.sequence_identifier document.sequence_identifier = self._sequence_identifier document.sequence_number = self._last_sequence_number self.producer_carriage.emit_data(data=document, **kwargs) else: document.sequence_identifier = self._sequence_identifier self.producer_carriage.emit_data(data=document, **kwargs)
def setUp(self): self.reference_clock = LocalMachineClock() self.sequence = EBUTT3DocumentSequence( sequence_identifier='sequenceTesting', reference_clock=self.reference_clock, lang='en-GB') self.document1 = self._create_document(1, 2) self.document2 = self._create_document(3, 4) self.document3 = self._create_document(5, 6)
def sequence(sequence_identifier, time_base): ref_clock = None if time_base == 'clock': ref_clock = LocalMachineClock() elif time_base == 'media': ref_clock = MediaClock() elif time_base == 'smpte': raise NotImplementedError() sequence = EBUTT3DocumentSequence(sequence_identifier, ref_clock, 'en-GB', verbose=True) return sequence
def process_document(self, document): if self._sequence is None: # Create sequence from document log.info("Creating document sequence from first document {}".format(document)) self._sequence = EBUTT3DocumentSequence.create_from_document(document) self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info( DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time, ) ) self._sequence.add_document(document)
def process_document(self, document): if self._sequence is None: # Create sequence from document log.info('Creating document sequence from first document {}'.format( document )) self._sequence = EBUTT3DocumentSequence.create_from_document(document) self._reference_clock = self._sequence.reference_clock if document.availability_time is None: document.availability_time = self._reference_clock.get_time() document_logger.info(DOC_RECEIVED.format( sequence_number=document.sequence_number, sequence_identifier=document.sequence_identifier, computed_begin_time=document.computed_begin_time, computed_end_time=document.computed_end_time )) self._sequence.add_document(document)
def test_discard_partial_sequence(self): doc1 = self._generate_document(sequence_number=1, offset=timedelta(seconds=0)) doc2 = self._generate_document(sequence_number=2, offset=timedelta(seconds=5)) doc3 = self._generate_document(sequence_number=3, offset=timedelta(seconds=10)) doc_refs = [ weakref.ref(doc1), weakref.ref(doc2), weakref.ref(doc3) ] sequence = EBUTT3DocumentSequence.create_from_document(doc1) seq_ref = weakref.ref(sequence) sequence.add_document(doc1) sequence.add_document(doc2) sequence.add_document(doc3) del doc1 del doc2 del doc3 gc.collect() self.assertIsInstance(seq_ref(), EBUTT3DocumentSequence) for item in doc_refs: self.assertIsInstance(item(), EBUTT3Document) sequence.discard_before(doc_refs[1]()) gc.collect() self.assertIsNone(doc_refs[0]()) self.assertIsInstance(doc_refs[1](), EBUTT3Document) self.assertIsInstance(doc_refs[2](), EBUTT3Document)
def main(): create_loggers() parsed_args = parser.parse_args() do_export = False if parsed_args.folder_export: do_export = True reference_clock = LocalMachineClock() reference_clock.clock_mode = 'local' document_sequence = EBUTT3DocumentSequence( sequence_identifier='TestSequence1', lang='en-GB', reference_clock=reference_clock ) if parsed_args.reference_clock: subtitle_tokens = None # Instead of text we provide the availability time as content. else: # Let's read our example conversation full_text = get_example_data('simple_producer.txt') if do_export: subtitle_tokens = iter(tokenize_english_document(full_text)) else: # This makes the source cycle infinitely. subtitle_tokens = cycle(tokenize_english_document(full_text)) # This object is used as flexible binding to the carriage mechanism and twisted integrated as dependency injection prod_impl = None if do_export: prod_impl = FilesystemProducerImpl(parsed_args.folder_export) else: prod_impl = TwistedProducerImpl() simple_producer = SimpleProducer( node_id='simple-producer', carriage_impl=prod_impl, document_sequence=document_sequence, input_blocks=subtitle_tokens ) if do_export: prod_impl.resume_producing() else: factory = wsFactory(u"ws://127.0.0.1:9000") factory.protocol = StreamingServerProtocol factory.listen() # We are using a pull producer because it is the looping_task timer that triggers the production from the websocket # level. Every time the factory gets a pull signal from the timer it tells the producer to generate data. TwistedPullProducer( consumer=factory, custom_producer=prod_impl ) looping_task = task.LoopingCall(factory.pull) looping_task.start(2.0) reactor.run()
def then_adding_doc2_success(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) sequence.add_document(document)
def then_adding_doc2_error(doc_list, template_file, template_dict): xml_file = template_file.render(template_dict) document = EBUTT3Document.create_from_xml(xml_file) sequence = EBUTT3DocumentSequence.create_from_document(doc_list[0]) with pytest.raises(Exception): sequence.add_document(document)
def create_sequence_from_document(self, document): self._sequence = EBUTT3DocumentSequence.create_from_document( document, verbose=self._verbose) if self._reference_clock is None: self._reference_clock = self._sequence.reference_clock
def main(): create_loggers() parsed_args = parser.parse_args() sequence_identifier = 'TestSequence1' do_export = False if parsed_args.folder_export: do_export = True reference_clock = LocalMachineClock() reference_clock.clock_mode = 'local' document_sequence = EBUTT3DocumentSequence( sequence_identifier=sequence_identifier, lang='en-GB', reference_clock=reference_clock ) if parsed_args.reference_clock: subtitle_tokens = None # Instead of text we provide the availability time as content. else: # Let's read our example conversation full_text = get_example_data('simple_producer.txt') if do_export: subtitle_tokens = iter(tokenize_english_document(full_text)) else: # This makes the source cycle infinitely. subtitle_tokens = cycle(tokenize_english_document(full_text)) # This object is used as flexible binding to the carriage mechanism and twisted integrated as dependency injection prod_impl = None if do_export: prod_impl = FilesystemProducerImpl(parsed_args.folder_export, reference_clock) else: prod_impl = WebsocketProducerCarriage() prod_impl.sequence_identifier = sequence_identifier simple_producer = SimpleProducer( node_id='simple-producer', producer_carriage=None, document_sequence=document_sequence, input_blocks=subtitle_tokens ) # Chaining a converter ProducerNodeCarriageAdapter( producer_carriage=prod_impl, producer_node=simple_producer ) if do_export: prod_impl.resume_producing() else: twisted_producer = TwistedWSPushProducer( custom_producer=prod_impl ) factory = BroadcastServerFactory( url=u"ws://127.0.0.1:9000", producer=twisted_producer ) factory.protocol = BroadcastServerProtocol factory.listen() # Here we schedule in the simple producer to create content responding to a periodic interval timer. looping_task = task.LoopingCall(simple_producer.process_document) looping_task.start(2.0) reactor.run()