def download_subtitle(youtube_id, lang_code, format="srt"):
    """
    Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    assert format == "srt", "We only support srt download at the moment."

    # srt map deals with amara, so uses ietf codes (e.g. en-us)
    with open(SRTS_JSON_FILEPATH, "r") as fp:
        api_info_map = json.load(fp)

    # get amara id
    amara_code = api_info_map.get(youtube_id).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    resp = make_request(
        AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % (base_url, amara_code, lang_code.lower())
    )
    if isinstance(resp, basestring) or not resp:
        return resp
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            resp.encoding = "UTF-8"
            response = (resp.text or u"").replace("\n\n\n", "\n   \n\n").replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
Esempio n. 2
0
def download_subtitle(youtube_id, lang_code, format="srt"):
    """Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly."""
    assert format == "srt", "We only support srt download at the moment."

    api_info_map = json.loads(
        open(settings.SUBTITLES_DATA_ROOT + SRTS_JSON_FILENAME).read())
    # get amara id
    amara_code = api_info_map.get(youtube_id).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    r = make_request(
        headers, "%s/%s/languages/%s/subtitles/?format=srt" %
        (base_url, amara_code, lang_code))
    if isinstance(r, basestring):
        return r
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            r.encoding = "UTF-8"
            response = (r.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
Esempio n. 3
0
def download_subtitle(youtube_id, lang_code, format="srt"):
    """Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly."""
    assert format == "srt", "We only support srt download at the moment."

    api_info_map = json.loads(
        open(settings.SUBTITLES_DATA_ROOT + SRTS_JSON_FILENAME).read()
    )
    # get amara id
    amara_code = api_info_map.get(youtube_id).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    r = make_request(headers, "%s/%s/languages/%s/subtitles/?format=srt" % (
        base_url, amara_code, lang_code))
    if isinstance(r, basestring):
        return r
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            r.encoding = "UTF-8"
            response = (r.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
def update_video_entry(youtube_id, entry={}):
    """Return a dictionary to be appended to the current schema:
            youtube_id: {
                            "amara_code": "3x4mp1e",
                            "language_codes": ["en", "es", "etc"],
                            "api_response": "success" OR "client_error" OR "server_error",
                            "last_success": "2013-07-06",
                            "last_attempt": "2013-07-06",
                        }
    To update an entry, pass it in.

    Note: language_codes are in IETF format (e.g. en-US)
    """
    request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % (
        youtube_id)
    r = make_request(AMARA_HEADERS, request_url)
    # add api response first to prevent empty json on errors
    entry["last_attempt"] = unicode(datetime.datetime.now().date())

    if isinstance(r, basestring):  # string responses mean some type of error
        logging.info("%s at %s" % (r, request_url))
        entry["api_response"] = r
        return entry

    try:
        content = json.loads(r.content)
        assert "objects" in content  # just index in, to make sure the expected data is there.
        assert len(content["objects"]) == 1
        languages = content["objects"][0]["languages"]
    except Exception as e:
        logging.warn("%s: Could not load json response: %s" % (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry

    # Get all the languages
    try:
        prev_languages = entry.get("language_codes") or []

        entry["language_codes"] = []
        entry["amara_code"] = None
        if languages:
            for language in languages:
                entry["language_codes"].append(language['code'])

            # pull amara video id
            amara_code = languages[0]["subtitles_uri"].split("/")[4]
            assert len(amara_code) == 12  # in case of future API change
            entry["amara_code"] = amara_code

        added_languages = set(entry["language_codes"]) - set(prev_languages)
        removed_languages = set(prev_languages) - set(entry["language_codes"])
        logging.info("Success for id=%s%s%s" % (
            youtube_id,
            "" if not added_languages else "; added languages=%s" % list(added_languages),
            "" if not removed_languages else "; removed languages=%s" % list(removed_languages),
        ))
        entry["api_response"] = "success"
        entry["last_success"] = unicode(datetime.datetime.now().date())

        return entry
    except Exception as e:
        logging.warn("Failed to grab language / amara codes for %s: %s" % (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry
Esempio n. 5
0
def update_video_entry(youtube_id, entry={}):
    """Return a dictionary to be appended to the current schema:
            youtube_id: {
                            "amara_code": "3x4mp1e",
                            "language_codes": ["en", "es", "etc"],
                            "api_response": "success" OR "client_error" OR "server_error",
                            "last_success": "2013-07-06",
                            "last_attempt": "2013-07-06",
                        }
    To update an entry, pass it in.
    """
    request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % (
        youtube_id)
    r = make_request(headers, request_url)
    # add api response first to prevent empty json on errors
    entry["last_attempt"] = unicode(datetime.datetime.now().date())

    if isinstance(r, basestring):  # string responses mean some type of error
        logging.info("%s at %s" % (r, request_url))
        entry["api_response"] = r
        return entry

    try:
        content = json.loads(r.content)
        assert "objects" in content  # just index in, to make sure the expected data is there.
        assert len(content["objects"]) == 1
        languages = content["objects"][0]["languages"]
    except Exception as e:
        logging.warn("%s: Could not load json response: %s" % (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry

    # Get all the languages
    try:
        prev_languages = entry.get("language_codes") or []

        entry["language_codes"] = []
        entry["amara_code"] = None
        if languages:
            for language in languages:
                entry["language_codes"].append(language['code'])

            # pull amara video id
            amara_code = languages[0]["subtitles_uri"].split("/")[4]
            assert len(amara_code) == 12  # in case of future API change
            entry["amara_code"] = amara_code

        added_languages = set(entry["language_codes"]) - set(prev_languages)
        removed_languages = set(prev_languages) - set(entry["language_codes"])
        logging.info("Success for id=%s%s%s" % (
            youtube_id,
            "" if not added_languages else "; added languages=%s" %
            list(added_languages),
            "" if not removed_languages else "; removed languages=%s" %
            list(removed_languages),
        ))
        entry["api_response"] = "success"
        entry["last_success"] = unicode(datetime.datetime.now().date())

        return entry
    except Exception as e:
        logging.warn("Failed to grab language / amara codes for %s: %s" %
                     (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry