Esempio n. 1
0
    def retrieve_transcript(self, **kwargs):
        encoding = kwargs.get('encoding') or speech.enums.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED
        sample_rate = kwargs.get('encoding') or 44100
        language_code = kwargs.get('language_code') or self.language
        automatic_punctuation = kwargs.get('automatic_punctuation') or True

        client = speech.SpeechClient()

        with io.open(self._save_path, 'rb') as f:
            content = f.read()

        audio = speech.types.RecognitionAudio(content=content)
        config = speech.types.RecognitionConfig(encoding=encoding, enable_automatic_punctuation=automatic_punctuation,
                                                sample_rate_hertz=sample_rate, language_code=language_code)
        result = client.recognize(config, audio)

        transcript = ''
        confidence = 0
        for r in result.results:
            confidence = r.alternatives[0].confidence
            transcript += r.alternatives[0].transcript

        self.confidence = confidence
        self.transcript = transcript
        db.commit()
        return transcript, confidence
Esempio n. 2
0
def import_article():
    data = request.get_json()
    logger.info('data: %s', data)

    source_url = data.get('source_url')
    pretty_name = data.get('pretty_name')
    language = data.get('language')
    rss_id = data.get('id')
    track = RSSTrack.get(rss_id)
    try:
        resp = requests.get(track.url)
        resp.raise_for_status()
    except HTTPError as e:
        return {'status': 'error', 'details': str(e)}, resp.status_code

    hostname = urlparse(track.url).hostname

    article = Article(pretty_name=track.name,
                      url=track.url,
                      rss_id=rss_id,
                      language=language)
    parser = ParsingRules.get(hostname)
    # TODO: Save in a better format!
    content = parser.parse(resp.content)
    article.content = content
    db.add(article)
    db.commit()
    return {'status': 'ok', 'article': article.to_json()}, 200
Esempio n. 3
0
    def split_file(self, seconds=DEFAULT_SEGMENT_LENGTH):

        if not seconds:
            seconds = DEFAULT_SEGMENT_LENGTH

        # logger.info('Splitting file at %s', path)
        # split_path = os.path.split(path)
        # output_path = "/".join([split_path[0], f'{seconds}_{split_path[1].rstrip(self.extension)}%04d.mp3'])
        path = os.path.dirname(self._save_path())
        output_dir = os.path.join(path, str(seconds))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        else:  # The base has already been split
            return
        ffmpeg_path = os.path.join(output_dir, f'%04d_{self.filename}')

        # TODO: Choice between different tools?
        args = [
            'ffmpeg', '-i', self._save_path(),
            '-f', 'segment',
            '-segment_time', str(seconds),
            '-c', 'copy', ffmpeg_path
        ]

        self.ffmpeg(args)

        logger.debug(f'listdir: {output_dir}')
        segments = [s for s in sorted(os.listdir(output_dir))]
        logger.debug('segments: %r' % segments)
        for idx, f in enumerate(segments):
            logger.debug(f'{idx} {f}')
            logger.debug(f'SAVING {idx}, {f}')
            s = Segment(base_id=self.id, position=idx + 1, length=seconds, language=self.language)
            db.add(s)
        db.commit()
Esempio n. 4
0
def add_channel():
    data = request.get_json()
    try:
        feed_url = data.get('feed_url')
        resp = requests.head(feed_url)
        resp.raise_for_status()
    except HTTPError:
        return {
            'status':
            'error',
            'details':
            f'Failed to connect to {feed_url} with status code {resp.status_code}'
        }, resp.status_code
    except ConnectionError:
        return {
            'status': 'error',
            'details': f'Server for url {feed_url} is not available'
        }, 503

    channel = RSSChannel(url=feed_url,
                         channel_name=data.get('feed_name'),
                         channel_description=data.get('feed_description'),
                         channel_type=data.get('feed_type'))
    db.add(channel)
    db.commit()
    return {'status': 'ok', 'data': channel.to_json()}, 200
Esempio n. 5
0
def create_user():
    init_db()
    # u = db.query(User).first()
    # logger.debug('User exists? %r' % u)
    # if not u:
    #     from flask_security.utils import hash_password
    #
    #     user_datastore.create_user(username='******',
    #                                email=os.environ.get('DEFAULT_EMAIL'),
    #                                password=hash_password(os.environ.get('DEFAULT_PASSWORD')),
    #                                seconds_available=600)
    db.commit()
Esempio n. 6
0
def translate_segment(file, position):
    # NOTE: This does not necessarily translate a whole segment, merely a part.
    # To translate a whole part, we may end up translating some bits twice
    # so we'll have to do something a bit more clever
    data = request.get_json(force=True)
    target = data.get('target', 'en-GB')
    text = data.get('text')
    logger.debug(f'translate file {file} position {position} text {text}')
    segment = BaseAudio.get(file).get_segment(position)
    logger.debug(f'translate segment: {segment.to_json()}')

    translation = _translate(text, target, segment)
    db.add(translation)
    db.commit()
    ret = {'status': 'ok', 'translation': translation.to_json(segment)}

    return ret, 200
Esempio n. 7
0
def save_file():
    """Downloads the file from the URL specified in POST/PUT body and saves on the filesystem and creates
    a record in the database

    Required fields:
    source_url: The URL to download it from (must be open, no auth will be attempted)
    Optional fields:
    language: BCP 47 language code. See https://cloud.google.com/speech-to-text/docs/languages for supported languages
    Files with more than one language should use string "MULTI"
    pretty_name: The name that the file will be displayed as

    :return: A JSON representation of the file or error message as appropriate
    """
    try:
        data = request.get_json()
        source_url = data.get('source_url')
        pretty_name = data.get('pretty_name')
        language = data.get('language')

        # RSS page field
        track_id = data.get('track_id')

        audio = BaseAudio.get(source_url=source_url)
        if BaseAudio.exists(audio):
            logger.info('Audio file already exists')
            return audio.to_json(), 409

        audio = BaseAudio(source_url=source_url,
                          pretty_name=pretty_name,
                          language=language)
        db.add(audio)
        db.flush()
        audio.save_file()
        db.commit()

        if track_id:
            track = db.query(RSSTrack).filter(RSSTrack.id == track_id).first()
            if track:
                track.is_added = True
                db.commit()

    except Exception as e:
        data = {'status': 'error', 'message': str(e)}
        return data, 500
    return audio.to_json(), 201
Esempio n. 8
0
def parse_feed():
    """Checks the content of an RSS URL and adds the episodes to the website

    :return:
    """
    data = request.get_json()
    feed_url = data.get('url')
    req = requests.get(feed_url)

    soup = BeautifulSoup(req.content, 'xml')

    channel = db.query(RSSChannel).filter(RSSChannel.url == feed_url).first()
    if not channel:
        name = soup.find('title').text
        description = soup.find('description').text
        channel = RSSChannel(url=feed_url,
                             channel_name=name,
                             channel_description=description)
        db.add(channel)
        db.commit()

    latest_track = db.query(RSSTrack)\
        .filter(RSSTrack.channel == channel)\
        .order_by(RSSTrack.published_date.desc())\
        .first()

    tracks = soup.find_all('item')
    ret = []
    for item in tracks:
        try:
            date_text = item.find('pubDate').text
        except AttributeError:
            date_text = item.find('pubdate').text
        rfc_date = datetime.datetime.fromtimestamp(
            email.utils.mktime_tz(email.utils.parsedate_tz(date_text)),
            pytz.utc)
        if latest_track:
            latest_track.published_date = latest_track.published_date.replace(
                tzinfo=rfc_date.tzinfo)
            if latest_track.published_date >= rfc_date:
                logger.debug(
                    f'Reached {latest_track.name}, which is already in the database'
                )
                break
        if channel.channel_type == 'audio':
            track_url = item.find('enclosure').get('url')
        elif channel.channel_type == 'text':
            track_url = item.find('link').text
        else:
            raise ValueError('Unrecognised channel type')
        name = item.find('title').text
        description = item.find('description').text
        pub_date = datetime.datetime.fromtimestamp(
            email.utils.mktime_tz(email.utils.parsedate_tz(date_text)),
            pytz.utc)
        track = RSSTrack(channel_id=channel.id,
                         url=track_url,
                         name=name,
                         description=description,
                         published_date=pub_date)
        ret.append(track)
        db.add(track)

    db.commit()
    return {'status': 'OK', 'data': [track.to_json() for track in ret]}, 200