Beispiel #1
0
def download_subtitle(youtube_id, lang_code, format="srt"):
    """
    Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly.

    Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us)
    """
    assert format == "srt", "We only support srt download at the moment."

    # srt map deals with amara, so uses ietf codes (e.g. en-us)
    api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True)

    # get amara id
    amara_code = api_info_map.get(youtube_id, {}).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    resp = make_request(
        AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % (
            base_url,
            amara_code,
            lang_code.lower(),
        ))
    if isinstance(resp, basestring):
        return resp
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            resp.encoding = "UTF-8"
            response = (resp.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
def download_subtitle(youtube_id, lang_code, format="srt"):
    """
    Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly.

    Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us)
    """
    assert format == "srt", "We only support srt download at the moment."


    # srt map deals with amara, so uses ietf codes (e.g. en-us)
    api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True)

    # get amara id
    amara_code = api_info_map.get(youtube_id, {}).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    resp = make_request(AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % (
        base_url, amara_code, lang_code.lower(),
    ))
    if isinstance(resp, basestring):
        return resp
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            resp.encoding = "UTF-8"
            response = (resp.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
def update_video_entry(youtube_id, entry={}):
    """Return a dictionary to be appended to the current schema:
            youtube_id: {
                            "amara_code": "3x4mp1e",
                            "language_codes": ["en", "es", "etc"],
                            "api_response": "success" OR "client_error" OR "server_error",
                            "last_success": "2013-07-06",
                            "last_attempt": "2013-07-06",
                        }
    To update an entry, pass it in.

    Note: language_codes are in IETF format (e.g. en-US)
    """
    request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % (
        youtube_id)
    resp = make_request(AMARA_HEADERS, request_url)
    # add api response first to prevent empty json on errors
    entry["last_attempt"] = unicode(datetime.datetime.now().date())

    if isinstance(resp,
                  basestring):  # string responses mean some type of error
        entry["api_response"] = resp
        return entry

    try:
        content = json.loads(resp.content)
        assert "objects" in content  # just index in, to make sure the expected data is there.
        assert len(content["objects"]) == 1
        languages = content["objects"][0]["languages"]
    except Exception as e:
        logging.warn(
            "Error updating video entry %s: Could not load json response: %s" %
            (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry

    # Get all the languages
    try:
        prev_languages = entry.get("language_codes") or []

        entry["language_codes"] = []
        entry["amara_code"] = None
        if languages:
            for language in languages:
                entry["language_codes"].append(language['code'])

            # pull amara video id
            amara_code = languages[0]["subtitles_uri"].split("/")[4]
            assert len(amara_code) == 12  # in case of future API change
            entry["amara_code"] = amara_code

        added_languages = set(entry["language_codes"]) - set(prev_languages)
        removed_languages = set(prev_languages) - set(entry["language_codes"])
        logging.info("Success for id=%s%s%s" % (
            youtube_id,
            "" if not added_languages else "; added languages=%s" %
            list(added_languages),
            "" if not removed_languages else "; removed languages=%s" %
            list(removed_languages),
        ))
        entry["api_response"] = "success"
        entry["last_success"] = unicode(datetime.datetime.now().date())

        return entry
    except Exception as e:
        logging.warn("Failed to grab language / amara codes for %s: %s" %
                     (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry
def update_video_entry(youtube_id, entry={}):
    """Return a dictionary to be appended to the current schema:
            youtube_id: {
                            "amara_code": "3x4mp1e",
                            "language_codes": ["en", "es", "etc"],
                            "api_response": "success" OR "client_error" OR "server_error",
                            "last_success": "2013-07-06",
                            "last_attempt": "2013-07-06",
                        }
    To update an entry, pass it in.

    Note: language_codes are in IETF format (e.g. en-US)
    """
    request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % (
        youtube_id)
    resp = make_request(AMARA_HEADERS, request_url)
    # add api response first to prevent empty json on errors
    entry["last_attempt"] = unicode(datetime.datetime.now().date())

    if isinstance(resp, basestring):  # string responses mean some type of error
        entry["api_response"] = resp
        return entry

    try:
        content = json.loads(resp.content)
        assert "objects" in content  # just index in, to make sure the expected data is there.
        assert len(content["objects"]) == 1
        languages = content["objects"][0]["languages"]
    except Exception as e:
        logging.warn("Error updating video entry %s: Could not load json response: %s" % (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry

    # Get all the languages
    try:
        prev_languages = entry.get("language_codes") or []

        entry["language_codes"] = []
        entry["amara_code"] = None
        if languages:
            for language in languages:
                entry["language_codes"].append(language['code'])

            # pull amara video id
            amara_code = languages[0]["subtitles_uri"].split("/")[4]
            assert len(amara_code) == 12  # in case of future API change
            entry["amara_code"] = amara_code

        added_languages = set(entry["language_codes"]) - set(prev_languages)
        removed_languages = set(prev_languages) - set(entry["language_codes"])
        logging.info("Success for id=%s%s%s" % (
            youtube_id,
            "" if not added_languages else "; added languages=%s" % list(added_languages),
            "" if not removed_languages else "; removed languages=%s" % list(removed_languages),
        ))
        entry["api_response"] = "success"
        entry["last_success"] = unicode(datetime.datetime.now().date())

        return entry
    except Exception as e:
        logging.warn("Failed to grab language / amara codes for %s: %s" % (youtube_id, e))
        entry["api_response"] = "client-error"
        return entry