Esempio n. 1
0
 def _get_url_by_page_id(self, page_id):
     try:
         response = self.session.get(action='query', prop='info', pageids=page_id, inprop='url')
     except (APIError, RequestException) as e:
         raise CreateSynopsisError(str(e))
     url = response['query']['pages'][str(page_id)]['fullurl']
     return url
Esempio n. 2
0
 def _extract_url_from_response(self, response):
     if response['edit']['result'] == 'Success':
         page_id = response['edit']['pageid']
         url = self._get_url_by_page_id(page_id)
         return url
     else:
         raise CreateSynopsisError("Cant extract url from response, response = {}"
                                   .format(response))
Esempio n. 3
0
 def __init__(self, login, password):
     self.session = mwapi.Session(host=settings.WIKI_BASE_URL, api_path=settings.WIKI_API_PATH)
     try:
         self.session.login(login, password)
         self.token = self.session.get(action='query', meta='tokens')['query']['tokens']['csrftoken']
     except (LoginError, APIError, RequestException) as e:
         msg = 'cant initialize WikiClient'
         logger.exception(msg)
         raise CreateSynopsisError('msg={}; error={}'.format(msg, e))
Esempio n. 4
0
    def _get_object(self, object_type, object_id):
        response = self.session.get('{base_url}/api/{type}/{id}'.format(base_url=settings.STEPIK_BASE_URL,
                                                                        type=object_type,
                                                                        id=object_id))
        if not response:
            raise CreateSynopsisError('Failed to get {type} page from stepik, status code = {status_code}'
                                      .format(type=object_type, status_code=response.status_code))

        return response.json()[object_type][0]
Esempio n. 5
0
    def __init__(self, video_file_path: str, image_saver: ImageSaverBase = None):
        self.image_saver = image_saver
        # noinspection PyArgumentList
        self.cap = cv2.VideoCapture(video_file_path)
        self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))

        if not self.cap.isOpened():
            raise CreateSynopsisError('VideoRecognition error, wrong video filename "{filename}"'
                                      .format(filename=video_file_path))
Esempio n. 6
0
 def get_page_categories(self, page_title):
     try:
         response = self.session.get(action='query',
                                     titles=page_title,
                                     prop='categories')
         pages = response['query']['pages']
         categories = list(map(lambda item: item['title'], list(pages.values())[0].get('categories', [])))
         return categories
     except Exception as e:
         raise CreateSynopsisError(str(e))
Esempio n. 7
0
 def add_text_to_page(self, page_title, text, summary):
     try:
         self.session.post(action='edit',
                           title=page_title,
                           summary=summary,
                           appendtext='\n{}'.format(text),
                           token=self.token,
                           nocreate=True)
     except Exception as e:
         raise CreateSynopsisError(str(e))
Esempio n. 8
0
    def _get_url_by_page_title(self, title):
        try:
            response = self.session.post(action='query', titles=title)
        except (APIError, RequestException) as e:
            raise CreateSynopsisError(str(e))

        page_id = int(list(response['query']['pages'])[0])
        if page_id < 0:
            return None

        return self._get_url_by_page_id(page_id)
Esempio n. 9
0
    def save(self, image: io.BytesIO, position: int) -> str:
        data = {'UPLOADCARE_PUB_KEY': self.pub_key, 'UPLOADCARE_STORE': 1}

        response = self.session.post(url=UPLOADCARE_URL_TO_UPLOAD,
                                     files={'file': image},
                                     data=data)

        if not response:
            raise CreateSynopsisError(
                'Failed to upload image, status code: {status_code}'.format(
                    status_code=response.status_code))

        return 'https://ucarecdn.com/{uuid}/'.format(
            uuid=response.json()['file'])
Esempio n. 10
0
def make_synopsis_from_video(video):
    with tempfile.TemporaryDirectory() as tmpdir:
        videofile = os.path.join(tmpdir, 'tmp.mp4')

        with open(videofile, 'wb') as f:
            response = requests.get(video['urls'][0]['url'], stream=True)
            if response.status_code != 200:
                raise CreateSynopsisError('Failed to download video, Status code: {status_code}, id = {id}'
                                          .format(status_code=response.status_code, id=video.id))
            size = 0
            for chunk in response.iter_content(VIDEOS_DOWNLOAD_CHUNK_SIZE):
                size += f.write(chunk)
                if size > VIDEOS_DOWNLOAD_MAX_SIZE:
                    raise CreateSynopsisError('Failed to download video, too big video file, id = {id}'
                                              .format(id=video['id']))

            out_audio = os.path.join(tmpdir, 'tmp_audio.wav')
            command = FFMPEG_EXTRACT_AUDIO.format(input_video=videofile,
                                                  output_audio=out_audio)
            if not run_shell_command(command):
                raise CreateSynopsisError(command)

            ar = AudioRecognitionYandex(audio_file_path=out_audio,
                                        lang=Language.RUSSIAN,
                                        key=settings.YANDEX_SPEECH_KIT_KEY)

            recognized_audio = ar.recognize()

            uploadcare_saver = ImageSaverUploadcare(pub_key=settings.UPLOAD_CARE_PUB_KEY)
            vr = VideoRecognitionCells(video_file_path=videofile,
                                       image_saver=uploadcare_saver)
            keyframes_src_with_timestamp = vr.get_keyframes_src_with_timestamp()

            content = merge_audio_and_video(keyframes_src_with_timestamp,
                                            recognized_audio)

            return content
Esempio n. 11
0
    def _create_page(self, title, text, summary):
        try:
            response = self.session.post(action='edit',
                                         title=title,
                                         section=0,
                                         summary=summary,
                                         text=text,
                                         token=self.token,
                                         createonly=True)
        except RequestException as e:
            raise CreateSynopsisError(str(e))
        except APIError:
            logger.exception('mwapi.errors.APIError: articleexists: - its OK')
            return self._get_url_by_page_title(title)

        page_url = self._extract_url_from_response(response)
        logger.info('created page with url %s', page_url)
        return page_url
Esempio n. 12
0
    def save_keyframes(self, keyframe_positions: Iterable[int]) -> List[list]:
        self.cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 0)

        frame_ptr = 0
        _, frame = self.cap.read()
        keyframe_positions = sorted(keyframe_positions)
        keyframes_src_with_timestamp = []
        for keyframe_position in keyframe_positions:
            while self.cap.isOpened() and frame_ptr != keyframe_position:
                ret, frame = self.cap.read()
                frame_ptr += 1
                if not ret:
                    raise CreateSynopsisError('Wrong keyframe_position = {}'.format(keyframe_position))

            image_bytes = io.BytesIO(cv2.imencode('.png', frame)[1].tostring())
            image_src = self.image_saver.save(image_bytes, keyframe_position)
            keyframes_src_with_timestamp.append([image_src, keyframe_position / self.fps])
        return keyframes_src_with_timestamp
Esempio n. 13
0
    def recognize(self) -> List[RecognizedChunk]:
        lang = None
        if self.lang == Language.RUSSIAN:
            lang = 'ru-RU'
        elif self.lang == Language.ENGLISH:
            lang = 'en-EN'
        recognized_audio = []
        for start, end, chunk in self._chunks():
            url = YANDEX_SPEECH_KIT_REQUEST_URL.format(key=self.key, lang=lang)
            response = self.session.post(url=url,
                                         data=chunk,
                                         headers={'Content-Type': 'audio/x-mpeg-3'})
            if not response:
                raise CreateSynopsisError('Failed to recognize audio, status code: {status_code}'
                                          .format(status_code=response.status_code))

            root = ElementTree.fromstring(response.text)
            text = root[0].text if root.attrib['success'] == '1' else AUDIO_IS_NOT_RECOGNIZED

            recognized_audio.append(self._recognize_text_format(start, end, text))
        return recognized_audio
Esempio n. 14
0
    def __init__(self, video_file_path: str,
                 image_saver: ImageSaverBase = None,
                 threshold: float = THRESHOLD_FOR_PEAKS_DETECTION):
        super().__init__(video_file_path, image_saver)
        self.threshold = threshold

        haar_cascade = '/home/synopsis/recognition/video/static/HS.xml'
        self.cascade = cv2.CascadeClassifier(haar_cascade)
        if self.cascade.empty():
            raise CreateSynopsisError('VideoRecognition error, wrong haar cascade filename "{filename}"'
                                      .format(filename=haar_cascade))

        self.shape = self._Shape(width=int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                                 height=int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        self.num_of_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
        self.frames_between_keyframes = (TIME_BETWEEN_KEYFRAMES * self.fps) // FRAME_PERIOD

        self.diffs = []
        self.peaks = []
        self.humans = []