def run(self): while True: try: presentation_to_recognize_db = PresentationsToRecognizeDBManager( ).extract_presentation_to_recognize() if not presentation_to_recognize_db: sleep(10) continue training_id = presentation_to_recognize_db.training_id presentation_file_id = presentation_to_recognize_db.file_id presentation_file_info = PresentationFilesDBManager( ).get_presentation_file(presentation_file_id) logger.info( 'Extracted presentation to recognize with presentation_file_id = {}, training_id = {}.' .format(presentation_file_id, training_id)) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.RECOGNIZING) if presentation_file_info is None: TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.RECOGNITION_FAILED) verdict = 'Presentation file with presentation_file_id = {} was not found.'\ .format(presentation_file_id) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue try: pres_extension = 'pdf' nonconverted_file_id = None if presentation_file_info.presentation_info: pres_extension = presentation_file_info.presentation_info.filetype nonconverted_file_id = presentation_file_info.presentation_info.nonconverted_file_id presentation_file = DBManager().get_file( presentation_file_id if not nonconverted_file_id else nonconverted_file_id) recognizer = self.presentation_recognizers[pres_extension] recognized_presentation = recognizer.recognize( presentation_file) except Exception as e: TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.RECOGNITION_FAILED) verdict = 'Recognition of presentation file with presentation_file_id = {} has failed.\n{}'\ .format(presentation_file_id, e) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue recognized_presentation_id = DBManager().add_file( repr(recognized_presentation)) TrainingsDBManager().add_recognized_presentation_id( training_id, recognized_presentation_id) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.RECOGNIZED) RecognizedPresentationsToProcessDBManager( ).add_recognized_presentation_to_process( recognized_presentation_id, training_id) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.SENT_FOR_PROCESSING) except Exception as e: logger.error('Unknown exception.\n{}'.format(e))
def _try_extract_and_process(self): try: audio_to_recognize_db = AudioToRecognizeDBManager().extract_audio_to_recognize() if not audio_to_recognize_db: return training_id = audio_to_recognize_db.training_id presentation_record_file_id = audio_to_recognize_db.file_id logger.info('Extracted audio to recognize with presentation_record_file_id = {}, training_id = {}.' .format(presentation_record_file_id, training_id)) TrainingsDBManager().change_audio_status(training_id, AudioStatus.RECOGNIZING) presentation_record_file = DBManager().get_file(presentation_record_file_id) if presentation_record_file is None: verdict = 'Presentation record file with presentation_record_file_id = {} was not found.' \ .format(presentation_record_file_id) self._hangle_error(training_id, verdict) return try: recognized_audio = self._audio_recognizer.recognize(presentation_record_file) except Exception as e: verdict = 'Recognition of a presentation record file with presentation_record_file_id = {} ' \ 'has failed.\n{}'.format(presentation_record_file_id, e) self._hangle_error(training_id, verdict) return recognized_audio_id = DBManager().add_file(repr(recognized_audio)) TrainingsDBManager().add_recognized_audio_id(training_id, recognized_audio_id) TrainingsDBManager().change_audio_status(training_id, AudioStatus.RECOGNIZED) RecognizedAudioToProcessDBManager().add_recognized_audio_to_process(recognized_audio_id, training_id) TrainingsDBManager().change_audio_status(training_id, AudioStatus.SENT_FOR_PROCESSING) except Exception as e: logger.error('Unknown exception.\n{}: {}.'.format(e.__class__, e))
def _hangle_error(self, training_id: ObjectId, verdict: str, score=0, audio_status=AudioStatus.RECOGNITION_FAILED): TrainingsDBManager().change_audio_status(training_id, audio_status) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, score) logger.warning(verdict)
def handle_presentation_upload(): """ Route to handle presentation upload. Calls presentation upload, then adds training, then redirects to the 'view_training' page. :return: Redirection to the 'view_training' page, or an empty dictionary with 404 HTTP code if access was denied. """ if not check_auth(): return {}, 404 upload_presentation_response, upload_presentation_response_code = upload_presentation( ) if upload_presentation_response.get('message') != 'OK': return upload_presentation_response, upload_presentation_response_code presentation_file_id = upload_presentation_response['presentation_file_id'] logger.info('Uploaded file with presentation_file_id = {}.'.format( presentation_file_id)) add_training_response, add_training_response_code = add_training( presentation_file_id) if add_training_response.get('message') != 'OK': return add_training_response, add_training_response_code TrainingsDBManager().change_training_status_by_training_id( add_training_response['training_id'], TrainingStatus.IN_PROGRESS) return redirect( url_for( 'routes_trainings.view_training', training_id=add_training_response['training_id'], ))
def get_audio_transcription(training_id: str) -> (dict, int): """ Endpoint to get an audio transcription by a training identifier. :param training_id: Training identifier. :return: Dictionary with per-slide audio transcription array, 'OK' message, or a dictionary with an explanation and 404 HTTP return code if an audio file was not found, or an empty dictionary with 404 HTTP return code if access was denied. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 training_db = TrainingsDBManager().get_training(training_id) audio_id = training_db.audio_id audio_as_json = DBManager().get_file(audio_id) if audio_as_json is None: return { 'message': 'No audio file with audio_id = {}.'.format(audio_id) }, 404 audio = Audio.from_json_file(audio_as_json) audio_slides = audio.audio_slides audio_transcription = [ ' '.join([word.word.value for word in audio_slide.recognized_words]) for audio_slide in audio_slides ] return {'audio_transcription': audio_transcription, 'message': 'OK'}, 200
def get_presentation_file_by_training_id(training_id: str): """ Endpoint to get a presentation file by a training identifier. :param training_id: Training identifier. :return: Presentation file that belongs to the training with the given training identifier, or a dictionary with an explanation and 404 HTTP return code if a presentation file was not found, or an empty dictionary with 404 HTTP return code if access was denied. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 training_db = TrainingsDBManager().get_training(training_id) presentation_file_id = training_db.presentation_file_id presentation_file = DBManager().get_file(presentation_file_id) if not presentation_file: return { 'message': 'No presentation file with presentation_file_id = {}.'.format( presentation_file_id) }, 404 logger.debug( 'Got presentation file with presentation_file_id = {}.'.format( presentation_file_id)) as_attachment = safe_strtobool(request.args.get('as_attachment', default=False), on_error=False) return send_file(presentation_file, mimetype='application/pdf', as_attachment=as_attachment), 200
def get_criterion_parameter_value(training_id: str, criterion_name, parameter_name) -> (dict, int): """ Endpoint to retrieve criterion parameter value. :param training_id: Training identifier. :param criterion_name: Criterion name. :param parameter_name: Parameter name. :return: Dictionary with parameter name, parameter value, and 'OK' message, or a dictionary with an explanation and 404 HTTP return code if a training, criterion, or parameter was not found, or an empty dictionary with 404 HTTP return code if access was denied. """ check_argument_is_convertible_to_object_id(training_id) if not check_access({'_id': ObjectId(training_id)}): return {}, 404 training_db = TrainingsDBManager().get_training(training_id) if not training_db: return {'message': 'No training with trainingId = {}.'.format(training_id)}, 404 criteria_pack_id = training_db.criteria_pack_id criteria_pack = CriteriaPackFactory().get_criteria_pack(criteria_pack_id) criterion = criteria_pack.get_criterion_by_name(criterion_name) if criterion is None: return {'message': 'No criterion with name = {}.'.format(criterion_name)}, 404 parameter_value = criterion.parameters.get(parameter_name) if parameter_value is None: return {'message': 'No parameter with name = {}.'.format(parameter_name)}, 404 return {'parameterName': parameter_name, 'parameterValue': parameter_value, 'message': 'OK'}, 200
def add_training(self, training_id): training = TrainingsDBManager().get_training(training_id) presentation_file_id = training.presentation_file_id presentation_record_file_id = training.presentation_record_file_id PresentationsToRecognizeDBManager().add_presentation_to_recognize( presentation_file_id, training_id) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.SENT_FOR_RECOGNITION) AudioToRecognizeDBManager().add_audio_to_recognize( presentation_record_file_id, training_id) TrainingsDBManager().change_audio_status( training_id, AudioStatus.SENT_FOR_RECOGNITION) TrainingsDBManager().set_processing_start_timestamp( training_id, datetime.now()) TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.PREPARING)
def apply(self, audio, presentation, training_id, criteria_results): current_audio_id = TrainingsDBManager().get__raining( training_id).presentation_record_file_id current_audio_file = DBManager().get_file(current_audio_id) try: current_audio_file = convert_from_mp3_to_wav( current_audio_file, frame_rate=self.parameters['sample_rate']) except: return CriterionResult(result=0, verdict='Cannot convert from mp3 to wav') current_audio_file, _ = librosa.load(current_audio_file.name) db_audio_ids = [ training.presentation_record_file_id for training in TrainingsDBManager().get__rainings() ] for db_audio_id in db_audio_ids: if db_audio_id == current_audio_id: continue db_audio_mp3 = DBManager().get_file(db_audio_id) try: db_audio = convert_from_mp3_to_wav( db_audio_mp3, frame_rate=self.parameters['sample_rate']) except: continue db_audio, _ = librosa.load(db_audio.name) aligned_audio = self.align(current_audio_file, self.parameters['sample_rate'], db_audio, self.parameters['sample_rate'], self.parameters['window_size'], self.parameters['window_step']) aligned_audio = self.downsample(aligned_audio) db_audio = self.downsample(db_audio, aligned_audio) audio_mfcc = librosa.feature.mfcc( y=aligned_audio, sr=self.parameters['sample_rate']) db_audio_mfcc = librosa.feature.mfcc( y=db_audio, sr=self.parameters['sample_rate']) common_length_ratio = self.common_length(audio_mfcc, db_audio_mfcc) if common_length_ratio > self.parameters['common_ratio_threshold']: return CriterionResult(result=0) return CriterionResult(result=1)
def _resend(self, training_db): training_id = training_db.pk presentation_record_file_id = training_db.presentation_record_file_id logger.info('Resent audio to recognize with presentation_record_file_id = {}, training_id = {}.' .format(presentation_record_file_id, training_id)) AudioToRecognizeDBManager().add_audio_to_recognize( file_id=presentation_record_file_id, training_id=training_db.pk, ) TrainingsDBManager().change_audio_status(training_id, AudioStatus.SENT_FOR_RECOGNITION)
def append_slide_switch_timestamp(training_id: str) -> (dict, int): """ Endpoint to append a slide switch timestamp. :param training_id: Training identifier. :return: {'message': 'OK'}, or an empty dictionary with 404 HTTP return code if access was denied or training status is not NEW. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 if not is_admin(): training_db = TrainingsDBManager().get_training(training_id) if training_db.status != TrainingStatus.IN_PROGRESS: return {}, 404 timestamp = request.args.get('timestamp', time.time(), float) TrainingsDBManager().append_timestamp(training_id, timestamp) logger.debug( 'Slide switch: training_id = {}, timestamp = {}, time.time() = {}.'. format(training_id, timestamp, time.time())) return {'message': 'OK'}, 200
def _resend_stuck_audio(self): try: trainings_db = TrainingsDBManager().get_trainings_filtered({'audio_status': AudioStatus.RECOGNIZING}) for training_db in trainings_db: if not self._is_stuck_predicate(training_db): logger.info('Training with training_id = {} has audio_status = RECOGNIZING and it\'s fresh enough.' .format(training_db.pk)) continue self._resend(training_db) except Exception as e: logger.error('Unknown exception.\n{}: {}.'.format(e.__class__, e))
def _check_access(filters: dict) -> bool: username = session.get('session_id', default=None) consumer_key = session.get('consumer_key', default=None) user_session = SessionsDBManager().get_session(username, consumer_key) if not user_session: return False if user_session.is_admin: return True trainings = TrainingsDBManager().get_trainings_filtered(filters=filters) return any( map(lambda current_training: current_training.username == username, trainings))
def delete_training_by_training_id(training_id: str) -> (dict, int): """ Endpoint to delete a training by its identifier. :param training_id: Training identifier. :return: {'message': 'OK'}, or an empty dictionary with 404 HTTP return code if access was denied. """ if not is_admin(): return {}, 404 TrainingsDBManager().delete_training(training_id) return {'message': 'OK'}, 200
def add_presentation_record(training_id: str) -> (dict, int): """ Endpoint to add presentation record to a training by its identifier. :param training_id: Training identifier :return: {'message': 'OK'}, or an empty dictionary with 404 HTTP return code if access was denied, record duration is not convertible to float, or presentation record has already been added. TODO: check that presentation record is mp3 TODO: check that duration is consistent """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 if 'presentationRecord' not in request.files: return {'message': 'No presentation record file.'}, 404 presentation_record_file = request.files['presentationRecord'] if 'presentationRecordDuration' not in request.form: return {'message': 'No presentation record duration.'}, 404 presentation_record_duration = request.form.get( 'presentationRecordDuration', default=None, type=float) if presentation_record_duration is None: return {}, 404 if not is_admin(): training_db = TrainingsDBManager().get_training(training_id) if training_db.presentation_record_file_id is not None: return {}, 404 TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.SENT_FOR_PREPARATION) presentation_record_file_id = DBManager().add_file( presentation_record_file) TrainingsDBManager().add_presentation_record( training_id, presentation_record_file_id, presentation_record_duration, ) return {'message': 'OK'}, 200
def get_training(training_id) -> (dict, int): """ Endpoint to get information about a training by its identifier. :param training_id: Training identifier :return: Dictionary with training information and 'OK' message, or a dictionary with an explanation and 404 HTTP return code if a training was not found, or an empty dictionary with 404 HTTP return code if access was denied. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 training_db = TrainingsDBManager().get_training(training_id) if training_db is None: return { 'message': 'No training with training_id = {}.'.format(training_id) }, 404 return get_training_information(training_db)
def resubmit_failed_trainings(): failed_trainings = TrainingsDBManager().get_trainings_filtered( filters={ '$or': [ { 'status': TrainingStatus.PREPARATION_FAILED }, { 'status': TrainingStatus.PROCESSING_FAILED }, #{'processing_start_timestamp': None} ] }) for current_training in failed_trainings: logger.info('Resubmitting training with training_id = {}'.format( current_training.pk)) current_training.feedback = {} current_training.save() TrainingManager().add_training(current_training.pk)
def apply(self, audio, presentation, training_id): logger.info( 'Called {}.apply for a training with training_id = {}'.format( self.name, training_id)) for criterion in self.criteria: try: criterion_result = criterion.apply(audio, presentation, training_id, self.criteria_results) self.add_criterion_result(criterion.name, criterion_result) TrainingsDBManager().add_criterion_result( training_id, criterion.name, criterion_result) logger.info( 'Attached {} {} to a training with training_id = {}'. format(criterion.name, criterion_result, training_id)) except Exception as e: logger.warning( 'Exception while applying {} for a training with training_id = {}.\n{}: {}' .format(criterion.name, training_id, e.__class__, e)) return self.criteria_results
def add_training(presentation_file_id) -> (dict, int): """ Endpoint to add a training based on the presentation file with the given identifier. :param presentation_file_id: Presentation file identifier. :return: Dictionary with training identifier and 'OK' message, or a dictionary with an explanation and 404 HTTP return code if a task attempt or a task was not found, or an empty dictionary with 404 HTTP return code if access was denied. #TODO check a file was uploaded by the current user??? """ if not check_auth(): return {}, 404 username = session.get('session_id') full_name = session.get('full_name') task_attempt_id = session.get('task_attempt_id') task_attempt_db = TaskAttemptsDBManager().get_task_attempt(task_attempt_id) if task_attempt_db is None: return { 'message': 'No task attempt with task_attempt_id = {}.'.format( task_attempt_id) }, 404 task_id = session.get('task_id') task_db = TasksDBManager().get_task(task_id) if task_db is None: return {'message': 'No task with task_id = {}.'.format(task_id)}, 404 criteria_pack_id = task_db.criteria_pack_id feedback_evaluator_id = session.get('feedback_evaluator_id') training_id = TrainingsDBManager().add_training( task_attempt_id=task_attempt_id, username=username, full_name=full_name, presentation_file_id=presentation_file_id, criteria_pack_id=criteria_pack_id, feedback_evaluator_id=feedback_evaluator_id, ).pk TaskAttemptsDBManager().add_training(task_attempt_id, training_id) return {'training_id': str(training_id), 'message': 'OK'}, 200
def get_count_page() -> (dict, int): username = request.args.get('username', None) full_name = request.args.get('full_name', None) countItems = request.args.get('count') if not countItems: countItems = 10 else: countItems = int(countItems) authorized = check_auth() is not None if not (check_admin() or (authorized and session.get('session_id') == username)): return {}, 404 count = TrainingsDBManager().get_count_page( remove_blank_and_none({ 'username': username, 'full_name': full_name }), countItems) result = {"count": count} return result, 200
def start_training_processing(training_id: str) -> (dict, int): """ Endpoint to start training processing of a training by its identifier. :param training_id: Training identifier. :return: {'message': 'OK'}, or an empty dictionary with 404 HTTP return code if access was denied or training status is not NEW. """ logger.info(f'start_training_processing. training_id = {training_id}') if not check_access({'_id': ObjectId(training_id)}): logger.info( f'start_training_processing. not access to training_id = {training_id}' ) return {}, 404 if not is_admin(): training_db = TrainingsDBManager().get_training(training_id) if training_db.status != TrainingStatus.SENT_FOR_PREPARATION: logger.info( f"start_training_processing. user not admin AND training_db.status != TrainingStatus.IN_PROGRESS (it's {training_db.status})" ) return {}, 404 TrainingManager().add_training(training_id) return {'message': 'OK'}, 200
def get_all_trainings() -> (dict, int): """ Endpoint to get information about all trainings. Can be optionally filtered by username or full name. :return: Dictionary with information about all trainings and 'OK' message, or an empty dictionary with 404 HTTP code if access was denied. """ username = request.args.get('username', None) full_name = request.args.get('full_name', None) numberPage = request.args.get('page') if not numberPage: numberPage = 0 else: numberPage = int(numberPage) countItems = request.args.get('count') if not countItems: countItems = 10 else: countItems = int(countItems) print(numberPage, countItems) authorized = check_auth() is not None if not (check_admin() or (authorized and session.get('session_id') == username)): return {}, 404 trainings = TrainingsDBManager().get_trainings_filtered( remove_blank_and_none({ 'username': username, 'full_name': full_name }), numberPage, countItems) trainings_json = {'trainings': {}} for i, current_training in enumerate(trainings): trainings_json['trainings'][str( current_training.pk)] = get_training_information(current_training) trainings_json['message'] = 'OK' return trainings_json, 200
def run(self): while True: try: recognized_presentation_db = RecognizedPresentationsToProcessDBManager() \ .extract_recognized_presentation_to_process() if not recognized_presentation_db: sleep(10) continue training_id = recognized_presentation_db.training_id recognized_presentation_id = recognized_presentation_db.file_id logger.info( 'Extracted recognized presentation with recognized_presentation_id = {}, training_id = {}.' .format(recognized_presentation_id, training_id)) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.PROCESSING) json_file = DBManager().get_file(recognized_presentation_id) if json_file is None: TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.PROCESSING_FAILED) verdict = 'Recognized presentation file with recognized_presentation_id = {} was not found.'\ .format(recognized_presentation_id) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue recognized_presentation = RecognizedPresentation.from_json_file( json_file) json_file.close() slide_switch_timestamps = TrainingsDBManager( ).get_slide_switch_timestamps(training_id) presentation = Presentation(recognized_presentation, slide_switch_timestamps) presentation_id = DBManager().add_file(repr(presentation)) TrainingsDBManager().add_presentation_id( training_id, presentation_id) TrainingsDBManager().change_presentation_status( training_id, PresentationStatus.PROCESSED) except Exception as e: logger.error('Unknown exception.\n{}'.format(e))
def apply(self, audio, presentation, training_id, criteria_results): training = TrainingsDBManager().get_training(training_id) if not training: return CriterionResult(0, t('Тренировка отсутствует в БД')) pres_file = PresentationFilesDBManager().get_presentation_file( training.presentation_file_id) if not pres_file: return CriterionResult(0, t('Файл презентации отсутствует в БД')) if not pres_file.presentation_info.nonconverted_file_id or pres_file.presentation_info.filetype == 'pdf': return CriterionResult( 0, t('Презентация не имеет поддерживаемого формата (odp, ppt, pptx)' )) if not self.check_alive(training.username): return CriterionResult(0, t('Система проверки недоступна')) file = DBManager().get_file( pres_file.presentation_info.nonconverted_file_id) check_id = self.send_file(file) flag, result = self.try_get_result(check_id) if not flag: return CriterionResult( result=0, verdict= f"Проблемы с проверкой (на стороне инстурмента): {result}") else: return CriterionResult( result=result['score'], verdict= f"С результатом проверки можно ознакомиться по ссылке: {self.parameters['result_url']}{result['check_id']}" )
def get_training_statistics(training_id: str) -> (dict, int): """ Endpoint to get statistics of a training by its identifier :param training_id: Training identifier :return: Dictionary with statistics of the training with the given identifier, or a dictionary with an explanation and 404 HTTP return code if something went wrong, or an empty dictionary with 404 HTTP return code if the file was not found or access was denied. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 training_db = TrainingsDBManager().get_training(training_id) presentation_file_id = training_db.presentation_file_id presentation_file_name = DBManager().get_file_name(presentation_file_id) if presentation_file_name is None: return { 'message': 'No presentation file with presentation_file_id = {}.'.format( presentation_file_id) }, 404 presentation_record_file_id = training_db.presentation_record_file_id training_status = training_db.status audio_status = training_db.audio_status presentation_status = training_db.presentation_status slides_time = [] if audio_status == AudioStatus.PROCESSED: # here we need to process audio_slides audio = Audio.from_json_file(DBManager().get_file( training_db.audio_id)) slides_time = proccess_training_slides_info(audio) feedback = training_db.feedback criteria_pack_id = training_db.criteria_pack_id feedback_evaluator_id = training_db.feedback_evaluator_id remaining_processing_time_estimation, remaining_processing_time_estimation_code = \ get_remaining_processing_time_by_training_id(training_id) if remaining_processing_time_estimation['message'] != 'OK': return remaining_processing_time_estimation, remaining_processing_time_estimation_code return { 'message': 'OK', 'presentation_file_id': str(presentation_file_id), 'presentation_file_name': presentation_file_name, 'presentation_record_file_id': str(presentation_record_file_id), 'feedback': feedback, 'training_status': training_status, 'audio_status': audio_status, 'presentation_status': presentation_status, 'slides_time': slides_time, 'remaining_processing_time_estimation': remaining_processing_time_estimation['processing_time_remaining'], 'criteria_pack_id': criteria_pack_id, 'feedback_evaluator_id': feedback_evaluator_id, }, 200
def run(self): while True: try: training_id = TrainingsToProcessDBManager().extract_training_id_to_process() if not training_id: sleep(10) continue logger.info('Extracted training with training_id = {}.'.format(training_id)) training_db = TrainingsDBManager().get_training(training_id) if training_db is None: TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.PROCESSING_FAILED ) verdict = 'Training with training_id = {} was not found.'.format(training_id) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue TrainingsDBManager().change_training_status_by_training_id(training_id, TrainingStatus.PROCESSING) audio_file = DBManager().get_file(training_db.audio_id) if audio_file is None: TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.PROCESSING_FAILED ) verdict = 'Audio file with audio_id = {}, training_id = {} was not found.'\ .format(training_db.audio_id, training_id) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue audio = Audio.from_json_file(audio_file) audio_file.close() presentation_file = DBManager().get_file(training_db.presentation_id) if presentation_file is None: TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.PROCESSING_FAILED ) verdict = 'Presentation file with presentation_id = {}, training_id = {} was not found.'\ .format(training_db.presentation_id, training_id) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue presentation = Presentation.from_json_file(presentation_file) presentation_file.close() criteria_pack_id = training_db.criteria_pack_id criteria_pack = CriteriaPackFactory().get_criteria_pack(criteria_pack_id) criteria_pack_db = CriterionPackDBManager().get_criterion_pack_by_name(criteria_pack.name) feedback_evaluator_id = training_db.feedback_evaluator_id feedback_evaluator = FeedbackEvaluatorFactory().get_feedback_evaluator(feedback_evaluator_id)(criteria_pack_db.criterion_weights) training = Training(training_id, audio, presentation, criteria_pack, feedback_evaluator) try: feedback = training.evaluate_feedback() except Exception as e: TrainingsDBManager().change_training_status_by_training_id( training_id, TrainingStatus.PROCESSING_FAILED ) verdict = 'Feedback evaluation for a training with training_id = {} has failed.\n{}'\ .format(training_id, e) TrainingsDBManager().append_verdict(training_id, verdict) TrainingsDBManager().set_score(training_id, 0) logger.warning(verdict) continue TrainingsDBManager().set_score(training_id, feedback.score) TrainingsDBManager().change_training_status_by_training_id(training_id, PresentationStatus.PROCESSED) task_attempt_id = training_db.task_attempt_id TaskAttemptsDBManager().update_scores(task_attempt_id, training_id, feedback.score) except Exception as e: logger.error('Unknown exception.\n{}'.format(e))
def get_remaining_processing_time_by_training_id( training_id: str) -> (dict, int): """ Endpoint to get estimated time until the training with the provided training identifier will be processed. Estimation is calculated as a half of durations of records that should be processed before the training (including presentation record belongs to the training). :param training_id: Training identifier. :return: Dictionary with estimated processing time and 'OK' message, or a dictionary with an explanation and 404 HTTP return code if a training was not found, or an empty dictionary with 404 HTTP return code if access was denied. """ if not check_access({'_id': ObjectId(training_id)}): return {}, 404 logger.debug( 'Estimating processing time of a training with training_id = {}.'. format(training_id)) current_training_db = TrainingsDBManager().get_training(training_id) if not current_training_db: return { 'message': 'No training with training_id = {}.'.format(training_id) }, 404 current_training_status = current_training_db.status if TrainingStatus.is_terminal(current_training_status): logger.debug( 'Current training status is {} and is terminal, training_id = {}.'. format(current_training_status, training_id)) return {'processing_time_remaining': 0, 'message': 'OK'}, 200 time_estimation = 0 trainings_with_recognizing_audio_status = \ TrainingsDBManager().get_trainings_filtered({'audio_status': AudioStatus.RECOGNIZING}) for training in trainings_with_recognizing_audio_status: time_since_audio_status_last_update = datetime.now().timestamp( ) - training.audio_status_last_update.time estimated_remaining_recognition_time = \ training.presentation_record_duration / 2 - time_since_audio_status_last_update message = 'Audio status is RECOGNIZING, training_id = {}, status last update = {}, {} seconds ago, '\ 'presentation record duration = {}.\nEstimated remaining recognition time = {}.'\ .format(training.pk, training.audio_status_last_update, time_since_audio_status_last_update, training.presentation_record_duration, estimated_remaining_recognition_time) if estimated_remaining_recognition_time < 0: message += ' Setting to 0.' logger.debug(message) time_estimation += max(0, estimated_remaining_recognition_time) current_presentation_record_file_id = current_training_db.presentation_record_file_id current_presentation_record_file_generation_time = current_presentation_record_file_id.generation_time if current_presentation_record_file_id else None if current_presentation_record_file_generation_time: # if training doesn't have presentation_record_file_id -> skip this trainings_with_audio_status_before_recognizing = TrainingsDBManager( ).get_trainings_filtered(filters={ '$or': [{ 'audio_status': { '$in': [AudioStatus.NEW, AudioStatus.SENT_FOR_RECOGNITION] } }] }, ) for training in trainings_with_audio_status_before_recognizing: if not training.presentation_record_file_id or ( not current_presentation_record_file_generation_time): continue presentation_record_file_generation_time = training.presentation_record_file_id.generation_time training_id = training.pk try: time_estimation_add = training.presentation_record_duration / 2 except (AttributeError, TypeError): continue if presentation_record_file_generation_time > current_presentation_record_file_generation_time: continue logger.debug( 'Presentation record file generation time for a training with training_id = {} is {}. ' 'It is earlier than or equals to generation time for the current training with training_id = {} ' 'that is {}. Adding {} seconds.'.format( training_id, presentation_record_file_generation_time, training_id, current_presentation_record_file_generation_time, time_estimation_add, )) time_estimation += time_estimation_add trainings_with_sent_for_processing_or_processing_status = TrainingsDBManager( ).get_trainings_filtered(filters={ '$or': [{ 'status': { '$in': [ TrainingStatus.PREPARED, TrainingStatus.SENT_FOR_PROCESSING, TrainingStatus.PROCESSING ] } }] }, ) if current_training_status not in \ [TrainingStatus.NEW, TrainingStatus.IN_PROGRESS, TrainingStatus.SENT_FOR_PREPARATION, TrainingStatus.PREPARING]: current_recognized_audio_generation_time = current_training_db.recognized_audio_id.generation_time else: current_recognized_audio_generation_time = None for training in trainings_with_sent_for_processing_or_processing_status: recognized_audio_generation_time = training.recognized_audio_id.generation_time if not current_recognized_audio_generation_time or \ recognized_audio_generation_time > current_recognized_audio_generation_time: continue time_estimation_add = 20 logger.debug( 'Current audio status is {}, training_id = {}. Adding {} seconds.'. format(training.status, training_id, time_estimation_add)) time_estimation += time_estimation_add if time_estimation == 0: time_estimation = 20 return { 'processing_time_remaining': round(time_estimation), 'message': 'OK' }, 200