def _get_url_by_page_id(self, page_id): try: response = self.session.get(action='query', prop='info', pageids=page_id, inprop='url') except (APIError, RequestException) as e: raise CreateSynopsisError(str(e)) url = response['query']['pages'][str(page_id)]['fullurl'] return url
def _extract_url_from_response(self, response): if response['edit']['result'] == 'Success': page_id = response['edit']['pageid'] url = self._get_url_by_page_id(page_id) return url else: raise CreateSynopsisError("Cant extract url from response, response = {}" .format(response))
def __init__(self, login, password): self.session = mwapi.Session(host=settings.WIKI_BASE_URL, api_path=settings.WIKI_API_PATH) try: self.session.login(login, password) self.token = self.session.get(action='query', meta='tokens')['query']['tokens']['csrftoken'] except (LoginError, APIError, RequestException) as e: msg = 'cant initialize WikiClient' logger.exception(msg) raise CreateSynopsisError('msg={}; error={}'.format(msg, e))
def _get_object(self, object_type, object_id): response = self.session.get('{base_url}/api/{type}/{id}'.format(base_url=settings.STEPIK_BASE_URL, type=object_type, id=object_id)) if not response: raise CreateSynopsisError('Failed to get {type} page from stepik, status code = {status_code}' .format(type=object_type, status_code=response.status_code)) return response.json()[object_type][0]
def __init__(self, video_file_path: str, image_saver: ImageSaverBase = None): self.image_saver = image_saver # noinspection PyArgumentList self.cap = cv2.VideoCapture(video_file_path) self.fps = int(self.cap.get(cv2.CAP_PROP_FPS)) if not self.cap.isOpened(): raise CreateSynopsisError('VideoRecognition error, wrong video filename "{filename}"' .format(filename=video_file_path))
def get_page_categories(self, page_title): try: response = self.session.get(action='query', titles=page_title, prop='categories') pages = response['query']['pages'] categories = list(map(lambda item: item['title'], list(pages.values())[0].get('categories', []))) return categories except Exception as e: raise CreateSynopsisError(str(e))
def add_text_to_page(self, page_title, text, summary): try: self.session.post(action='edit', title=page_title, summary=summary, appendtext='\n{}'.format(text), token=self.token, nocreate=True) except Exception as e: raise CreateSynopsisError(str(e))
def _get_url_by_page_title(self, title): try: response = self.session.post(action='query', titles=title) except (APIError, RequestException) as e: raise CreateSynopsisError(str(e)) page_id = int(list(response['query']['pages'])[0]) if page_id < 0: return None return self._get_url_by_page_id(page_id)
def save(self, image: io.BytesIO, position: int) -> str: data = {'UPLOADCARE_PUB_KEY': self.pub_key, 'UPLOADCARE_STORE': 1} response = self.session.post(url=UPLOADCARE_URL_TO_UPLOAD, files={'file': image}, data=data) if not response: raise CreateSynopsisError( 'Failed to upload image, status code: {status_code}'.format( status_code=response.status_code)) return 'https://ucarecdn.com/{uuid}/'.format( uuid=response.json()['file'])
def make_synopsis_from_video(video): with tempfile.TemporaryDirectory() as tmpdir: videofile = os.path.join(tmpdir, 'tmp.mp4') with open(videofile, 'wb') as f: response = requests.get(video['urls'][0]['url'], stream=True) if response.status_code != 200: raise CreateSynopsisError('Failed to download video, Status code: {status_code}, id = {id}' .format(status_code=response.status_code, id=video.id)) size = 0 for chunk in response.iter_content(VIDEOS_DOWNLOAD_CHUNK_SIZE): size += f.write(chunk) if size > VIDEOS_DOWNLOAD_MAX_SIZE: raise CreateSynopsisError('Failed to download video, too big video file, id = {id}' .format(id=video['id'])) out_audio = os.path.join(tmpdir, 'tmp_audio.wav') command = FFMPEG_EXTRACT_AUDIO.format(input_video=videofile, output_audio=out_audio) if not run_shell_command(command): raise CreateSynopsisError(command) ar = AudioRecognitionYandex(audio_file_path=out_audio, lang=Language.RUSSIAN, key=settings.YANDEX_SPEECH_KIT_KEY) recognized_audio = ar.recognize() uploadcare_saver = ImageSaverUploadcare(pub_key=settings.UPLOAD_CARE_PUB_KEY) vr = VideoRecognitionCells(video_file_path=videofile, image_saver=uploadcare_saver) keyframes_src_with_timestamp = vr.get_keyframes_src_with_timestamp() content = merge_audio_and_video(keyframes_src_with_timestamp, recognized_audio) return content
def _create_page(self, title, text, summary): try: response = self.session.post(action='edit', title=title, section=0, summary=summary, text=text, token=self.token, createonly=True) except RequestException as e: raise CreateSynopsisError(str(e)) except APIError: logger.exception('mwapi.errors.APIError: articleexists: - its OK') return self._get_url_by_page_title(title) page_url = self._extract_url_from_response(response) logger.info('created page with url %s', page_url) return page_url
def save_keyframes(self, keyframe_positions: Iterable[int]) -> List[list]: self.cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 0) frame_ptr = 0 _, frame = self.cap.read() keyframe_positions = sorted(keyframe_positions) keyframes_src_with_timestamp = [] for keyframe_position in keyframe_positions: while self.cap.isOpened() and frame_ptr != keyframe_position: ret, frame = self.cap.read() frame_ptr += 1 if not ret: raise CreateSynopsisError('Wrong keyframe_position = {}'.format(keyframe_position)) image_bytes = io.BytesIO(cv2.imencode('.png', frame)[1].tostring()) image_src = self.image_saver.save(image_bytes, keyframe_position) keyframes_src_with_timestamp.append([image_src, keyframe_position / self.fps]) return keyframes_src_with_timestamp
def recognize(self) -> List[RecognizedChunk]: lang = None if self.lang == Language.RUSSIAN: lang = 'ru-RU' elif self.lang == Language.ENGLISH: lang = 'en-EN' recognized_audio = [] for start, end, chunk in self._chunks(): url = YANDEX_SPEECH_KIT_REQUEST_URL.format(key=self.key, lang=lang) response = self.session.post(url=url, data=chunk, headers={'Content-Type': 'audio/x-mpeg-3'}) if not response: raise CreateSynopsisError('Failed to recognize audio, status code: {status_code}' .format(status_code=response.status_code)) root = ElementTree.fromstring(response.text) text = root[0].text if root.attrib['success'] == '1' else AUDIO_IS_NOT_RECOGNIZED recognized_audio.append(self._recognize_text_format(start, end, text)) return recognized_audio
def __init__(self, video_file_path: str, image_saver: ImageSaverBase = None, threshold: float = THRESHOLD_FOR_PEAKS_DETECTION): super().__init__(video_file_path, image_saver) self.threshold = threshold haar_cascade = '/home/synopsis/recognition/video/static/HS.xml' self.cascade = cv2.CascadeClassifier(haar_cascade) if self.cascade.empty(): raise CreateSynopsisError('VideoRecognition error, wrong haar cascade filename "{filename}"' .format(filename=haar_cascade)) self.shape = self._Shape(width=int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), height=int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.num_of_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.fps = int(self.cap.get(cv2.CAP_PROP_FPS)) self.frames_between_keyframes = (TIME_BETWEEN_KEYFRAMES * self.fps) // FRAME_PERIOD self.diffs = [] self.peaks = [] self.humans = []