def download_subtitle(youtube_id, lang_code, format="srt"): """ Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly. Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us) """ assert format == "srt", "We only support srt download at the moment." # srt map deals with amara, so uses ietf codes (e.g. en-us) api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True) # get amara id amara_code = api_info_map.get(youtube_id, {}).get("amara_code") # make request # Please see http://amara.readthedocs.org/en/latest/api.html base_url = "https://amara.org/api2/partners/videos" resp = make_request( AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % ( base_url, amara_code, lang_code.lower(), )) if isinstance(resp, basestring): return resp else: # return the subtitle text, replacing empty subtitle lines with # spaces to make the FLV player happy try: resp.encoding = "UTF-8" response = (resp.text or u"") \ .replace("\n\n\n", "\n \n\n") \ .replace("\r\n\r\n\r\n", "\r\n \r\n\r\n") except Exception as e: logging.error(e) response = "client-error" return response
def download_subtitle(youtube_id, lang_code, format="srt"): """ Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly. Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us) """ assert format == "srt", "We only support srt download at the moment." # srt map deals with amara, so uses ietf codes (e.g. en-us) api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True) # get amara id amara_code = api_info_map.get(youtube_id, {}).get("amara_code") # make request # Please see http://amara.readthedocs.org/en/latest/api.html base_url = "https://amara.org/api2/partners/videos" resp = make_request(AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % ( base_url, amara_code, lang_code.lower(), )) if isinstance(resp, basestring): return resp else: # return the subtitle text, replacing empty subtitle lines with # spaces to make the FLV player happy try: resp.encoding = "UTF-8" response = (resp.text or u"") \ .replace("\n\n\n", "\n \n\n") \ .replace("\r\n\r\n\r\n", "\r\n \r\n\r\n") except Exception as e: logging.error(e) response = "client-error" return response
def update_video_entry(youtube_id, entry={}): """Return a dictionary to be appended to the current schema: youtube_id: { "amara_code": "3x4mp1e", "language_codes": ["en", "es", "etc"], "api_response": "success" OR "client_error" OR "server_error", "last_success": "2013-07-06", "last_attempt": "2013-07-06", } To update an entry, pass it in. Note: language_codes are in IETF format (e.g. en-US) """ request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % ( youtube_id) resp = make_request(AMARA_HEADERS, request_url) # add api response first to prevent empty json on errors entry["last_attempt"] = unicode(datetime.datetime.now().date()) if isinstance(resp, basestring): # string responses mean some type of error entry["api_response"] = resp return entry try: content = json.loads(resp.content) assert "objects" in content # just index in, to make sure the expected data is there. assert len(content["objects"]) == 1 languages = content["objects"][0]["languages"] except Exception as e: logging.warn( "Error updating video entry %s: Could not load json response: %s" % (youtube_id, e)) entry["api_response"] = "client-error" return entry # Get all the languages try: prev_languages = entry.get("language_codes") or [] entry["language_codes"] = [] entry["amara_code"] = None if languages: for language in languages: entry["language_codes"].append(language['code']) # pull amara video id amara_code = languages[0]["subtitles_uri"].split("/")[4] assert len(amara_code) == 12 # in case of future API change entry["amara_code"] = amara_code added_languages = set(entry["language_codes"]) - set(prev_languages) removed_languages = set(prev_languages) - set(entry["language_codes"]) logging.info("Success for id=%s%s%s" % ( youtube_id, "" if not added_languages else "; added languages=%s" % list(added_languages), "" if not removed_languages else "; removed languages=%s" % list(removed_languages), )) entry["api_response"] = "success" entry["last_success"] = unicode(datetime.datetime.now().date()) return entry except Exception as e: logging.warn("Failed to grab language / amara codes for %s: %s" % (youtube_id, e)) entry["api_response"] = "client-error" return entry
def update_video_entry(youtube_id, entry={}): """Return a dictionary to be appended to the current schema: youtube_id: { "amara_code": "3x4mp1e", "language_codes": ["en", "es", "etc"], "api_response": "success" OR "client_error" OR "server_error", "last_success": "2013-07-06", "last_attempt": "2013-07-06", } To update an entry, pass it in. Note: language_codes are in IETF format (e.g. en-US) """ request_url = "https://www.amara.org/api2/partners/videos/?format=json&video_url=http://www.youtube.com/watch?v=%s" % ( youtube_id) resp = make_request(AMARA_HEADERS, request_url) # add api response first to prevent empty json on errors entry["last_attempt"] = unicode(datetime.datetime.now().date()) if isinstance(resp, basestring): # string responses mean some type of error entry["api_response"] = resp return entry try: content = json.loads(resp.content) assert "objects" in content # just index in, to make sure the expected data is there. assert len(content["objects"]) == 1 languages = content["objects"][0]["languages"] except Exception as e: logging.warn("Error updating video entry %s: Could not load json response: %s" % (youtube_id, e)) entry["api_response"] = "client-error" return entry # Get all the languages try: prev_languages = entry.get("language_codes") or [] entry["language_codes"] = [] entry["amara_code"] = None if languages: for language in languages: entry["language_codes"].append(language['code']) # pull amara video id amara_code = languages[0]["subtitles_uri"].split("/")[4] assert len(amara_code) == 12 # in case of future API change entry["amara_code"] = amara_code added_languages = set(entry["language_codes"]) - set(prev_languages) removed_languages = set(prev_languages) - set(entry["language_codes"]) logging.info("Success for id=%s%s%s" % ( youtube_id, "" if not added_languages else "; added languages=%s" % list(added_languages), "" if not removed_languages else "; removed languages=%s" % list(removed_languages), )) entry["api_response"] = "success" entry["last_success"] = unicode(datetime.datetime.now().date()) return entry except Exception as e: logging.warn("Failed to grab language / amara codes for %s: %s" % (youtube_id, e)) entry["api_response"] = "client-error" return entry