def init_logger(log_file_name): log_rotate_windows_files = get_config('log_rotate_windows_files', int) log_level = get_config('log_level') log_to_console = get_config('log_to_console', bool) file_handler = MultiProcessingLog(log_file_name, when='midnight', backup_count=log_rotate_windows_files) file_handler.setLevel(get_config('log_level')) file_formatter = logging.Formatter( '[%(asctime)s.%(msecs)d][%(process)d/%(threadName)s] [%(levelname)s] [%(name)s]: [%(funcName)s():%(lineno)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') file_handler.setFormatter(file_formatter) if log_to_console is True: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(log_level) console_formatter = logging.Formatter( '[%(process)d/%(threadName)s][%(levelname)s] [%(name)s] [%(funcName)s():%(lineno)s] %(message)s' ) console_handler.setFormatter(console_formatter) logging.root.addHandler(console_handler) logging.root.setLevel(log_level) logging.root.addHandler(file_handler) return
def __init__(self, file_name, file_size, chunks_to_upload, entry_id, recorded_id, backend_client, logger, infile): self.infile = infile self.file_name = file_name self.logger = logger self.file_size = file_size self.chunks_to_upload = chunks_to_upload self.partner_id = backend_client.get_live_entry(entry_id).partnerId self.recorded_id = recorded_id self.entry_id = entry_id self.backend_client = backend_client self.chunk_index = 1 self.upload_token_buffer_size = get_config('upload_token_buffer_size_mb', 'int') * 1000000 # buffer is in MB # self.token_id,self.start_from = ServerUploader.backend_client.get_token(self.partner_id,file_name) # if !self.token_id: upload_token_list_response = backend_client.upload_token_list(self.partner_id, file_name) if upload_token_list_response.totalCount == 0: self.token_id = backend_client.upload_token_add(self.partner_id, file_name, file_size) self.uploaded_file_size = 0 return if upload_token_list_response.totalCount == 1 or upload_token_list_response.totalCount == 2: # if token is exist self.token_id = upload_token_list_response.objects[0].id self.uploaded_file_size = upload_token_list_response.objects[0].uploadedFileSize if self.uploaded_file_size is None: # API return None instead of 0. self.uploaded_file_size = 0 self.logger.info("Found token exist for %s, token: %s, stating from %s", self.file_name, self.token_id, self.uploaded_file_size) return if upload_token_list_response.totalCount > 1: # if more then one result, throw exption raise Exception('file ' + file_name + ' has ' + upload_token_list_response.totalCount + '(more then one) KalturaUploadToken')
def __init__(self, file_name, file_size, chunks_to_upload, entry_id, recorded_id, backend_client, logger, infile): self.infile = infile self.file_name = file_name self.logger = logger self.file_size = file_size self.chunks_to_upload = chunks_to_upload self.partner_id = backend_client.get_live_entry(entry_id).partnerId self.recorded_id = recorded_id self.entry_id = entry_id self.backend_client = backend_client self.chunk_index = 1 self.upload_token_buffer_size = get_config('upload_token_buffer_size_mb', 'int') * 1000000 # buffer is in MB # self.token_id,self.start_from = ServerUploader.backend_client.get_token(self.partner_id,file_name) # if !self.token_id: upload_token_list_response = backend_client.upload_token_list(self.partner_id, file_name) if upload_token_list_response.totalCount == 0: self.token_id = backend_client.upload_token_add(self.partner_id, file_name, file_size) self.uploaded_file_size = 0 return if upload_token_list_response.totalCount == 1 or upload_token_list_response.totalCount == 2: # if token is exist self.token_id = upload_token_list_response.objects[0].id self.uploaded_file_size = upload_token_list_response.objects[0].uploadedFileSize if self.uploaded_file_size is None: # API return None instead of 0. self.uploaded_file_size = 0 self.logger.info("Found token exist for %s, token: %s, stating from %s", self.file_name, self.token_id, self.uploaded_file_size) return if upload_token_list_response.totalCount > 1: # if more then one result, throw exption raise Exception('file ' + file_name + ' has ' + upload_token_list_response.totalCount + '(more then one) KalturaUploadToken')
def run(self): try: mode = get_config('mode') is_first_flavor = True count_uploaded_mp4 = 0 code = '' for mp4 in self.mp4_files_list: try: result = re.search(self.mp4_filename_pattern, mp4) if not result or not result.group('flavor_id'): error = "Error running upload task, failed to parse flavor id from filename: [{0}]".format( mp4) self.logger.error(error) raise ValueError(error) flavor_id = result.group('flavor_id') file_full_path = os.path.join(self.recording_path, mp4) if mode == 'remote': self.upload_file(file_full_path, flavor_id, is_first_flavor) if mode == 'local': self.append_recording_handler(file_full_path, flavor_id, is_first_flavor) is_first_flavor = False count_uploaded_mp4 += 1 except KalturaException as e: code = e.code if e.code == 'FLAVOR_PARAMS_ID_NOT_FOUND': self.logger.warn( '{}, failed to upload {}, flavor id {}'.format( e.message, mp4, flavor_id)) else: raise e if count_uploaded_mp4 == 0: if len(self.mp4_files_list) > 0: mp4_files = str(self.mp4_files_list) err = Exception( 'failed to upload any of {} check log errors'.format( mp4_files)) err.code = code raise err else: self.logger.warn( 'there were no mp4 files to upload. check {}'.format( self.recording_path)) except KalturaException as e: self.logger.error( 'failed to upload VOD with error {}, exception details: {}'. format(e.code, e.message)) if e.code == 'KALTURA_RECORDING_DISABLED': self.logger.warn("%s, move it to done directory", e.message) elif e.code == 'ENTRY_ID_NOT_FOUND': self.logger.warn("%s, move it to done directory", e.message) else: raise e
def init_logger(log_file_name): log_rotate_windows_files = get_config('log_rotate_windows_files', int) log_level = get_config('log_level') log_to_console = get_config('log_to_console', bool) file_handler = MultiProcessingLog(log_file_name, when='midnight', backup_count=log_rotate_windows_files) file_handler.setLevel(get_config('log_level')) file_formatter = logging.Formatter('[%(asctime)s.%(msecs)d][%(process)d/%(threadName)s] [%(levelname)s] [%(name)s]: [%(funcName)s():%(lineno)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') file_handler.setFormatter(file_formatter) if log_to_console is True: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(log_level) console_formatter = logging.Formatter( '[%(process)d/%(threadName)s][%(levelname)s] [%(name)s] [%(funcName)s():%(lineno)s] %(message)s') console_handler.setFormatter(console_formatter) logging.root.addHandler(console_handler) logging.root.setLevel(log_level) logging.root.addHandler(file_handler) return
def __init__(self, task, number_of_processes, output_directory, max_task_count, skipped_task_output): self.number_of_processes = number_of_processes self.task = task self.task_name = task.__name__ self.polling_interval = get_config('polling_interval_sec', 'int') base_directory = get_config('recording_base_dir') hostname = gethostname() self.failed_tasks_handling_interval = get_config('failed_tasks_handling_interval', 'int')*60 # in minutes self.failed_tasks_max_retries = get_config('failed_tasks_max_retries') self.task_directory = os.path.join(base_directory, hostname, self.task_name) self.error_directory = os.path.join(base_directory, 'error') self.failed_tasks_directory = os.path.join(base_directory, hostname, self.task_name, 'failed') self.web_incoming_directory = os.path.join(base_directory, 'incoming') self.input_directory = os.path.join(base_directory, hostname, self.task_name, 'incoming') self.working_directory = os.path.join(base_directory, hostname, self.task_name, 'processing') self.output_directory = output_directory self.task_queue_size = max_task_count self.task_queue = Queue(max_task_count) self.logger = logging.getLogger(__name__+'-'+self.task_name) self.skipped_task_output = skipped_task_output self.on_startup()
def run(self): try: mode = get_config('mode') if mode == 'remote': self.upload_file(self.output_file_path) if mode == 'local': self.append_recording_handler() except KalturaException as e: if e.code == 'KALTURA_RECORDING_DISABLED': self.logger.warn("%s, move it to done directory", e.message) else: raise e
class TaskBase(object): hostname = gethostname() base_directory = os.path.join(get_config('recording_base_dir'), hostname) cron_jon_stamp = get_config('cron_jon_stamp') def check_stamp(self): with open(self.stamp_full_path, "r") as stamp_file: # w+ since we truncated the file stamp = stamp_file.read() if stamp == self.cron_jon_stamp: self.logger.info("[{}] Entry has no stamp, since it was zombie!".format(self.log_header)) self.duration = 0 return if stamp == self.duration: self.logger.debug("[{}] Stamp [{}] is equal to job duration param".format(self.log_header, stamp)) else: msg = "[{}] Stamps are not equal! process stamp:[{}], found in file: [{}], abort directory".format(self.log_header, self.duration, stamp) raise UnequallStampException(msg) def __init__(self, param, logger_info): self.duration = param['duration'] self.recorded_id = param['recorded_id'] self.entry_directory = param['directory'] self.entry_id = param['entry_id'] # set job name as log header self.log_header = "{}_{}_{}".format(self.entry_id, self.recorded_id, self.duration) self.logger = logger_decorator(self.__class__.__name__, logger_info) self.output_filename = self.entry_directory self.recording_path = os.path.join(self.base_directory, self.__class__.__name__, 'processing', self.entry_directory) self.stamp_full_path = os.path.join(self.recording_path, 'stamp') __metaclass__ = abc.ABCMeta @abc.abstractmethod def run(self): """running the task""" return
def __init__(self, task, number_of_processes, output_directory, max_task_count): self.number_of_processes = number_of_processes self.task = task self.task_name = task.__name__ self.polling_interval = get_config('polling_interval_sec', 'int') base_directory = get_config('recording_base_dir') hostname = gethostname() self.failed_tasks_handling_interval = get_config( 'failed_tasks_handling_interval', 'int') * 60 # in minutes self.failed_tasks_max_retries = get_config('failed_tasks_max_retries') self.task_directory = os.path.join(base_directory, hostname, self.task_name) self.error_directory = os.path.join(base_directory, 'error') self.failed_tasks_directory = os.path.join(base_directory, hostname, self.task_name, 'failed') self.input_directory = os.path.join(base_directory, hostname, self.task_name, 'incoming') self.working_directory = os.path.join(base_directory, hostname, self.task_name, 'processing') self.output_directory = output_directory self.task_queue = Queue(max_task_count) self.logger = logging.getLogger(__name__ + '-' + self.task_name) self.on_startup()
class ThreadWorkers: # singleton object, __metaclass__ = Singleton num_of_thread = get_config('num_of_upload_thread', 'int') logger = logging.getLogger(__name__) def __init__(self): self.q = Queue.Queue() self.generate_upload_thread() self.job_failed = [] def generate_upload_thread(self): for i in range(1, self.num_of_thread + 1): t = Thread(target=self.worker, args=(i, )) t.setName( "UploadTasks-" + str(i)) # note this is not work for multiple uploader process t.daemon = True t.start() def worker(self, index): self.logger.info("Thread %d started working", index) while True: upload_chunk_job = self.q.get() if not upload_chunk_job: self.logger.warning( "Got \'None\' as upload job. Check if it's a bug") continue try: upload_chunk_job.upload() except Exception as e: self.logger.error( "Failed to upload chunk %s from file %s : %s \n %s", upload_chunk_job.chunk_index, upload_chunk_job.upload_session.file_name, str(e), traceback.format_exc()) self.job_failed.append(upload_chunk_job) finally: self.q.task_done() def add_job(self, job): self.q.put(job) def wait_jobs_done(self): self.q.join() # wait for all task finish job_failed_to_return = self.job_failed self.job_failed = [] # initial array for the next job return job_failed_to_return
class ConcatenationTask(TaskBase): nginx_port = get_config('nginx_port') nginx_host = get_config('nginx_host') nginx_url = "http://" + nginx_host+ ":" + nginx_port +"/dc-0/recording/hls/p/0/e/{0}/t/0" def __init__(self, param, logger_info): TaskBase.__init__(self, param, logger_info) concat_task_processing_dir = os.path.join(self.base_directory, self.__class__.__name__, 'processing') self.recording_path = os.path.join(concat_task_processing_dir, self.entry_directory) self.stamp_full_path = os.path.join(self.recording_path, 'stamp') self.url_base_entry = self.nginx_url.format(self.recorded_id) self.url_master = os.path.join(self.url_base_entry, 'master.m3u8') def find_source(self): self.logger.debug("About to load master manifest from %s" ,self.url_master) m3u8_obj = m3u8.load(self.url_master) flavor_list = {} maxbandwidth = -1 for element in m3u8_obj.playlists: flavor_list[element.stream_info.bandwidth] = element.absolute_uri if element.stream_info.bandwidth > maxbandwidth: maxbandwidth = element.stream_info.bandwidth maxbandwidth_url = element.absolute_uri if maxbandwidth is -1: msg = "Failed to find source from flavor list %s" % (str(flavor_list)) raise ValueError(msg) self.logger.info("Got Bandwidths url pairs %s, find the source with the bandwidth [%s] url: [%s]", str(flavor_list), maxbandwidth, maxbandwidth_url) return maxbandwidth_url def download_chunks_and_concat(self, chunks, output_full_path): with open(output_full_path, 'wb') as file_output: # todo should truncated the file self.logger.info("About to concat %d files from manifest into %s", len(chunks), output_full_path) for chunk in chunks: chunks_url = os.path.join(self.url_base_entry, chunk) chunk_bytes = self.download_file(chunks_url) self.logger.debug("Successfully downloaded from url %s, about to write it to %s", chunks_url, output_full_path) file_output.write(chunk_bytes) def download_file(self, url): self.logger.debug("About to request the url:%s ", url) return urllib2.urlopen(url).read() # whats happen if faild to get, or getting timeout? @staticmethod def parse_m3u8(m3u8): regex = r"(.*.ts)$" matches = re.findall(regex, m3u8, re.MULTILINE) return matches def run(self): output_full_path = os.path.join(self.recording_path, self.output_filename) if os.path.isfile(output_full_path): self.logger.warn("file [%s] already exist", output_full_path) return url_source_manifest = self.find_source() playlist = self.download_file(url_source_manifest) self.logger.debug("load recording manifest : \n %s ", playlist) chunks = m3u8.loads(playlist).files self.download_chunks_and_concat(chunks, output_full_path) self.logger.info("Successfully concat %d files into %s", len(chunks), output_full_path)
#! /usr/bin/env python from CULCrawler import CULCrawler from ThreadPool import * from Config import config import urllib from Daemon import * if __name__ == "__main__": #createDaemon() crawler = CULCrawler() threadNum = 5 pool = ThreadPool(threadNum) crawl_page_num = int(config.get_config('crawl_pages')) for i in xrange(crawl_page_num): url = 'http://www.citeulike.org/home/page/' + str(i + 1) pool.queueTask(crawler.crawl, url) # keywords search f = open("keywords", "r") for keyword in f.readlines(): keyword = keyword.strip() query = urllib.urlencode({'q' : keyword}) url_prefix = 'http://www.citeulike.org/search/all/page/' for i in xrange(crawl_page_num): url = url_prefix + str(i + 1) + '?' + query #print url pool.queueTask(crawler.crawl, url) f.close() pool.joinAll()
#!/usr/bin/env python import time import logging.handlers import os import re import traceback from Config.config import get_config from Logger.Logger import init_logger import glob logger = logging.getLogger('ZombieEntryCatchers') recording_base_dir = get_config('recording_base_dir') recordings_dir = os.path.join(recording_base_dir, 'recordings') recording_incoming_dir = os.path.join(recording_base_dir, 'incoming') entry_regex = '^[01]_\w{8}' pattern = re.compile(entry_regex) threshold_time_sec = 3600 # 1 hour log_full_path = get_config('cron_job_log_file_name') init_logger(log_full_path) polling_interval_sec = get_config('cron_job_polling_interval_hours', 'int') * 60 * 60 cron_jon_stamp = get_config('cron_jon_stamp') def job(): logger.info("Start scanning directory in %s", recordings_dir) now = int(time.time()) recording_list = glob.glob(recordings_dir + '/*/*/*/*') for recorded_id_path in recording_list: try:
class ConcatenationTask(TaskBase): nginx_port = get_config('nginx_port') nginx_host = get_config('nginx_host') secret = get_config('token_key') token_url_template = nginx_host + ":" + nginx_port + "/dc-0/recording/hls/p/0/e/{0}/" cwd = os.path.dirname(os.path.abspath(__file__)) ts_to_mp4_convertor = os.path.join(cwd, '../bin/ts_to_mp4_convertor') def __init__(self, param, logger_info): TaskBase.__init__(self, param, logger_info) concat_task_processing_dir = os.path.join(self.base_directory, self.__class__.__name__, 'processing') self.recording_path = os.path.join(concat_task_processing_dir, self.entry_directory) self.stamp_full_path = os.path.join(self.recording_path, 'stamp') self.token_url = self.token_url_template.format(self.recorded_id) self.nginx_url = "http://" + self.token_url + "t/{0}" self.flavor_pattern = 'index-s(?P<flavor>\d+)' self.iso639_wrapper = Iso639Wrapper(logger_info) def tokenize_url(self, url): if self.secret is None: return 0 dir_name = os.path.dirname(url) dir_name = re.sub(r'https?://', '', dir_name) token = "{0} {1}/".format(self.secret, dir_name) hash = hashlib.md5(token).digest() encoded_hash = base64.urlsafe_b64encode(hash).rstrip('=') return encoded_hash def extract_flavor_dict(self): self.logger.debug("About to load master manifest from %s", self.url_master) m3u8_obj = m3u8.load(self.url_master) flavors_list = [] for element in m3u8_obj.playlists: flavors_list.append( Flavor(url=element.absolute_uri, language='und')) for element in m3u8_obj.media: language = element.language # convert alpha_2 (iso639_1 format) to alpha_3 (iso639-3) check https://pypi.python.org/pypi/pycountry if len(element.language) == 2: language = self.iso639_wrapper.convert_language_to_iso639_3( unicode(language)) flavors_list.append( Flavor(url=element.absolute_uri, language=language)) return flavors_list def download_chunks_and_concat(self, chunks, output_full_path): try: with open(output_full_path, 'wb') as file_output: failed_chunks = 0 self.logger.info( "About to concat [%d] files from manifest into [%s]", len(chunks), output_full_path) for chunk in chunks: chunks_url = os.path.join(self.url_base_entry, chunk) try: chunk_bytes = self.download_file(chunks_url) self.logger.debug( "Successfully downloaded from url [%s], about to write it to [%s]", chunks_url, output_full_path) file_output.write(chunk_bytes) except urllib2.HTTPError as e: if e.code == 404: failed_chunks += 1 self.logger.error( "Failed to download chunk [%s], got response 404", chunk) else: raise e if failed_chunks > 0: self.logger.warn( "Failed to download [%s] chunks from [%s]", failed_chunks, len(chunks)) if failed_chunks == len(chunks): raise urllib2.HTTPError( self.url_base_entry, 404, "Failed to download all chunks from manifest", None, None) except urllib2.HTTPError as e: self.logger.error("Error to concat file %s, removing file", output_full_path) os.remove(output_full_path) raise e def download_file(self, url): self.logger.debug("About to request the url: [%s] ", url) return urllib2.urlopen(url).read() @staticmethod def parse_m3u8(m3u8): regex = r"(.*.ts)$" matches = re.findall(regex, m3u8, re.MULTILINE) return matches def get_flavor_id(self, url_postfix, single_flavor): if single_flavor: flavors_dirs = filter(os.path.isdir, [ os.path.join(self.recording_path, f) for f in os.listdir(self.recording_path) ]) flavor_id = flavors_dirs[0].rsplit('/', 1)[-1] else: result = re.search(self.flavor_pattern, url_postfix) if not result: error = "Error running concat task, failed to parse flavor from url: [%s]", obj.url self.logger.error(error) raise ValueError(error) flavor_id = result.group('flavor') return flavor_id def run(self): command = self.ts_to_mp4_convertor + ' ' token = self.tokenize_url(self.token_url) self.url_base_entry = self.nginx_url.format(token) self.url_master = os.path.join(self.url_base_entry, 'master.m3u8') flavors_list = self.extract_flavor_dict() single_flavor = len(flavors_list) == 1 for obj in flavors_list: url_postfix = obj.url.rsplit('/', 1)[1] flavor_id = self.get_flavor_id(url_postfix, single_flavor) ts_output_filename = self.get_output_filename(flavor_id) output_full_path = os.path.join(self.recording_path, ts_output_filename) mp4_full_path = output_full_path.replace('.ts', '.mp4') command = command + ' ' + output_full_path + ' ' + mp4_full_path + ' ' + obj.language if os.path.isfile(output_full_path): self.logger.warn("file [%s] already exist", output_full_path) continue playlist = self.download_file(obj.url) self.logger.debug("load recording manifest : \n %s ", playlist) chunks = m3u8.loads(playlist).files self.download_chunks_and_concat(chunks, output_full_path) self.logger.info("Successfully concat %d files into %s", len(chunks), output_full_path) self.convert_ts_to_mp4(command) def convert_ts_to_mp4(self, command): start_time = datetime.now() exitcode = -1 status = 'succeeded' # convert the each flavor concatenated ts file to single mp4 self.logger.debug('About to run TS -> MP4 conversion. Command: %s', command) try: process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) log_subprocess_output(process, "ffmpeg: ts->mp4", self.logger) output, outerr = process.communicate() exitcode = process.returncode if exitcode is 0: self.logger.info('Successfully finished TS -> MP4 conversion') else: status = 'failed' error = 'Failed to convert TS -> MP4. Convertor process exit code {}, {}'.format( exitcode, outerr) self.logger.error(error) raise subprocess.CalledProcessError(exitcode, command) except (OSError, subprocess.CalledProcessError) as e: self.logger.fatal("Failed to convert TS -> MP4 {}".format(str(e))) raise e except Exception as e: self.logger.fatal("Failed to convert TS -> MP4 {}".format(str(e))) raise e finally: end_time = datetime.now() duration = (end_time - start_time).total_seconds() self.logger.info( "Conversion of TS -> MP4, {}, exit code [{}], duration [{}] seconds" .format(status, str(exitcode), str(int(duration)))) def get_output_filename(self, flavor): return self.output_filename + '_f' + flavor + '_out.ts'
class UploadTask(TaskBase): # Global scope #global backend_client upload_token_buffer_size = get_config('upload_token_buffer_size_mb', 'int') * 1000000 # buffer is in MB def __init__(self, param, logger_info): TaskBase.__init__(self, param, logger_info) self.output_file_path = os.path.join(self.recording_path, self.output_filename) session_id = self.entry_id + '-' + self.recorded_id self.backend_client = BackendClient(session_id) self.chunk_index = 0 def get_chunks_to_upload(self, file_size): if file_size % self.upload_token_buffer_size == 0: return int(file_size / self.upload_token_buffer_size) return int(file_size / self.upload_token_buffer_size) + 1 def upload_file(self, file_name): threadWorkers = ThreadWorkers() file_size = os.path.getsize(file_name) chunks_to_upload = self.get_chunks_to_upload(file_size) with io.open(file_name, 'rb') as infile: upload_session = KalturaUploadSession(self.output_filename, file_size, chunks_to_upload, self.entry_id, self.recorded_id, self.backend_client, self.logger, infile) # todo checkuse case of while upload_session.chunk_index <= chunks_to_upload-1: chunk = upload_session.get_next_chunk() if chunk is None: break threadWorkers.add_job(chunk) failed_jobs = threadWorkers.wait_jobs_done() self.logger.info('Finish to upload [%s chunks], about to upload last chunk', chunks_to_upload-1) # last chunk chunk = upload_session.get_next_chunk(last_chunk = True) if chunk is not None: threadWorkers.add_job(chunk) job_result = threadWorkers.wait_jobs_done() failed_jobs.extend(job_result) self.check_stamp() upload_session_json = str(vars(upload_session)) if len(failed_jobs) == 0: self.logger.info("successfully upload all chunks, call append recording") self.check_replacment_status(upload_session.partner_id) self.backend_client.set_recorded_content_remote(upload_session, str(float(self.duration)/1000)) os.rename(self.output_file_path, self.output_file_path + '.done') else: raise Exception("Failed to upload file, "+str(len(failed_jobs))+" chunks from "+str(chunks_to_upload)+ " where failed:" + upload_session_json) def check_replacment_status(self, partner_id): self.logger.debug("About to check replacement status for [%s]", self.recorded_id) recorded_obj = self.backend_client.get_recorded_entry(partner_id, self.recorded_id) self.logger.debug("Got replacement Status: %s", recorded_obj.replacementStatus.value) if recorded_obj.replacementStatus.value != KalturaEntryReplacementStatus.NONE: self.logger.info("entry %s has replacementStatus %s, calling cancel_replace", self.recorded_id, recorded_obj.replacementStatus) self.backend_client.cancel_replace(partner_id, self.recorded_id) def append_recording_handler(self): partner_id = self.backend_client.get_live_entry(self.entry_id).partnerId self.check_replacment_status(partner_id) self.backend_client.set_recorded_content_local(partner_id, self.entry_id, self.output_file_path, str(float(self.duration)/1000), self.recorded_id) def run(self): try: mode = get_config('mode') if mode == 'remote': self.upload_file(self.output_file_path) if mode == 'local': self.append_recording_handler() except KalturaException as e: if e.code == 'KALTURA_RECORDING_DISABLED': self.logger.warn("%s, move it to done directory", e.message) else: raise e
for my_process in processes: print ("kill process "+str(my_process.pid)) try: parent = psutil.Process(my_process.pid) except psutil.NoSuchProcess: print ("Not child process for " + str(my_process.pid)) children = parent.children(recursive=True) for child_process in children: print ("Found child process " + str(child_process.pid)+ ", send SIGTERM") child_process.kill() my_process.terminate() sys.exit(0) log_full_path = get_config('log_file_name') init_logger(log_full_path) processes = [] max_task_count = get_config("max_task_count", 'int') concat_processors_count = get_config('concat_processors_count', 'int') uploading_processors_count = get_config('uploading_processors_count', 'int') base_directory = get_config('recording_base_dir') tasks_done_directory = path.join(base_directory, 'done') incoming_upload_directory = path.join(base_directory, socket.gethostname(), UploadTask.__name__, 'incoming') signal.signal(signal.SIGTERM, signal_term_handler) signal.signal(signal.SIGINT, signal_term_handler) ConcatenationTaskRunner = TaskRunner(ConcatenationTask, concat_processors_count, incoming_upload_directory,
class UploadTask(TaskBase): # Global scope #global backend_client upload_token_buffer_size = get_config('upload_token_buffer_size_mb', 'int') * 1000000 # buffer is in MB def __init__(self, param, logger_info): TaskBase.__init__(self, param, logger_info) session_id = self.entry_id + '-' + self.recorded_id self.backend_client = BackendClient(session_id) mp4_filename_pattern = param['directory'] + '_f*_out.mp4' self.mp4_files_list = glob.glob1(self.recording_path, mp4_filename_pattern) self.mp4_filename_pattern = "[0,1]_.+_[0,1]_.+_\d+(.\d+)?_f(?P<flavor_id>\d+)_out[.]mp4" def get_chunks_to_upload(self, file_size): if file_size % self.upload_token_buffer_size == 0: return int(file_size / self.upload_token_buffer_size) return int(file_size / self.upload_token_buffer_size) + 1 def upload_file(self, file_name, flavor_id, is_first_flavor): threadWorkers = ThreadWorkers() file_size = os.path.getsize(file_name) chunks_to_upload = self.get_chunks_to_upload(file_size) with io.open(file_name, 'rb') as infile: upload_session = KalturaUploadSession( file_name, file_size, chunks_to_upload, self.entry_id, self.recorded_id, self.backend_client, self.logger, infile) if chunks_to_upload > 2: chunk = upload_session.get_next_chunk() if chunk is not None: threadWorkers.add_job(chunk) failed_jobs = threadWorkers.wait_jobs_done() if len(failed_jobs) != 0: raise Exception("Failed to upload first chunk") self.logger.debug("Finish to upload first chunks") while upload_session.chunk_index <= chunks_to_upload - 1: chunk = upload_session.get_next_chunk() if chunk is None: break threadWorkers.add_job(chunk) failed_jobs = threadWorkers.wait_jobs_done() self.logger.info( 'Finish to upload [%s chunks], about to upload last chunk', chunks_to_upload - 1) # last chunk chunk = upload_session.get_next_chunk(last_chunk=True) if chunk is not None: threadWorkers.add_job(chunk) job_result = threadWorkers.wait_jobs_done() failed_jobs.extend(job_result) self.check_stamp() upload_session_json = str(vars(upload_session)) if len(failed_jobs) == 0: self.logger.info( "successfully upload all chunks, call append recording") if is_first_flavor: self.check_replacement_status(upload_session.partner_id) self.backend_client.set_recorded_content_remote( upload_session, str(float(self.duration) / 1000), flavor_id) os.rename(file_name, file_name + '.done') else: raise Exception("Failed to upload file, " + str(len(failed_jobs)) + " chunks from " + str(chunks_to_upload) + " where failed:" + upload_session_json) def check_replacement_status(self, partner_id): self.logger.debug("About to check replacement status for [%s]", self.recorded_id) recorded_obj = self.backend_client.get_recorded_entry( partner_id, self.recorded_id) self.logger.debug("Got replacement Status: %s", recorded_obj.replacementStatus.value) if recorded_obj.replacementStatus.value != KalturaEntryReplacementStatus.NONE: self.logger.info( "entry %s has replacementStatus %s, calling cancel_replace", self.recorded_id, recorded_obj.replacementStatus) self.backend_client.cancel_replace(partner_id, self.recorded_id) def append_recording_handler(self, file_full_path, flavor_id, is_first_flavor): partner_id = self.backend_client.get_live_entry( self.entry_id).partnerId if is_first_flavor: self.check_replacement_status(partner_id) self.backend_client.set_recorded_content_local( partner_id, self.entry_id, file_full_path, str(float(self.duration) / 1000), self.recorded_id, flavor_id) def run(self): try: mode = get_config('mode') is_first_flavor = True count_uploaded_mp4 = 0 code = '' for mp4 in self.mp4_files_list: try: result = re.search(self.mp4_filename_pattern, mp4) if not result or not result.group('flavor_id'): error = "Error running upload task, failed to parse flavor id from filename: [{0}]".format( mp4) self.logger.error(error) raise ValueError(error) flavor_id = result.group('flavor_id') file_full_path = os.path.join(self.recording_path, mp4) if mode == 'remote': self.upload_file(file_full_path, flavor_id, is_first_flavor) if mode == 'local': self.append_recording_handler(file_full_path, flavor_id, is_first_flavor) is_first_flavor = False count_uploaded_mp4 += 1 except KalturaException as e: code = e.code if e.code == 'FLAVOR_PARAMS_ID_NOT_FOUND': self.logger.warn( '{}, failed to upload {}, flavor id {}'.format( e.message, mp4, flavor_id)) else: raise e if count_uploaded_mp4 == 0: if len(self.mp4_files_list) > 0: mp4_files = str(self.mp4_files_list) err = Exception( 'failed to upload any of {} check log errors'.format( mp4_files)) err.code = code raise err else: self.logger.warn( 'there were no mp4 files to upload. check {}'.format( self.recording_path)) except KalturaException as e: self.logger.error( 'failed to upload VOD with error {}, exception details: {}'. format(e.code, e.message)) if e.code == 'KALTURA_RECORDING_DISABLED': self.logger.warn("%s, move it to done directory", e.message) elif e.code == 'ENTRY_ID_NOT_FOUND': self.logger.warn("%s, move it to done directory", e.message) else: raise e
import sys, os sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from Config.config import get_config print get_config("recording_base_dir")
for my_process in processes: print ("kill process "+str(my_process.pid)) try: parent = psutil.Process(my_process.pid) except psutil.NoSuchProcess: print ("Not child process for " + str(my_process.pid)) children = parent.children(recursive=True) for child_process in children: print ("Found child process " + str(child_process.pid)+ ", send SIGTERM") child_process.kill() my_process.terminate() sys.exit(0) log_full_path = get_config('log_file_name') init_logger(log_full_path) processes = [] max_task_count = get_config("max_task_count", 'int') concat_processors_count = get_config('concat_processors_count', 'int') uploading_processors_count = get_config('uploading_processors_count', 'int') base_directory = get_config('recording_base_dir') tasks_done_directory = path.join(base_directory, 'done') incoming_upload_directory = path.join(base_directory, socket.gethostname(), UploadTask.__name__, 'incoming') signal.signal(signal.SIGTERM, signal_term_handler) signal.signal(signal.SIGINT, signal_term_handler) ConcatenationTaskRunner = TaskRunner(ConcatenationTask, concat_processors_count, incoming_upload_directory,
import sys, os sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from Config.config import get_config print get_config("recording_base_dir")
def get_arg_params(): if args.entyId is not None: param['entry_id'] = args.entyId.lstrip() if args.recordedId is not None: param['recorded_id'] = args.recordedId.lstrip() if args.recordingDuration is not None: param['duration'] = args.recordingDuration.lstrip() logger.info("Parameters: %s", str(param)) set_config("log_to_console", "True") parser = argparse.ArgumentParser() parser_argument_configure() args = parser.parse_args() recover_log_file_name = get_config('recover_log_file_name') init_logger(recover_log_file_name) logger = logging.getLogger(__name__) path = args.path.lstrip() path_split = path.rsplit('/', 1) base_directory = path directory_name = path_split[1] has_all_custom_param = args.entyId is not None and args.recordedId is not None and args.recordingDuration is not None param ={} if TaskRunner.match(directory_name) is None: if has_all_custom_param is False: logger.error("Can't find all parameters, entyId [%s], recordedId [%s] recordingDuration [%s]", args.entyId, args.recordedId, args.recordingDuration) exit(1) else: get_arg_params() else:
class BackendClient: admin_secret = get_config('admin_secret') partner_id = get_config('partner_id') url = get_config('api_service_url') session_duration = get_config('session_duration') mode = get_config('mode') format = 2 request_timeout = 120 expiration_time_ks = -1 mutex = Lock() config = KalturaConfiguration(url) client = KalturaClient(config) client.setPartnerId(partner_id) ks = None type = KalturaSessionType.ADMIN def __init__(self, session_id): self.logger = logger_decorator(self.__class__.__name__, session_id) self.logger.info( "Init BackendClient: admin_secret %s, partner_id %s, session_duration %s, url %s", self.admin_secret, self.partner_id, self.session_duration, self.url) def create_new_session(self): ks = self.client.generateSessionV2(self.admin_secret, None, self.type, self.partner_id, int(self.session_duration)) #result = self.client.session.start(self.admin_secret, None, self.type, self.partner_id, None, None) BackendClient.ks = ks BackendClient.expiration_time_ks = int(self.session_duration) + int( time.time()) - 3600 # confidence interval self.client.setKs(self.ks) self.logger.info("Creating a new session, KS= %s ", self.ks) def get_kaltura_session(self): self.mutex.acquire() try: if (self.ks is None) or self.expiration_time_ks < int(time.time()): self.create_new_session() finally: self.mutex.release() def impersonate_client(self, partner_id): global ks self.get_kaltura_session( ) # generate KS in case that not existed or expired clone_client = KalturaClient(self.config) clone_client.setPartnerId(partner_id) clone_client.setKs(self.ks) return clone_client def handle_request(self, partner_id, service, action, *parameters): client = self.impersonate_client(partner_id) service_attribute = getattr(client, service) action_attribute = getattr(service_attribute, action) self.logger.debug("[%s][%s] About to call", service, action) (result, header) = action_attribute(*parameters) self.logger.debug("[%s][%s] API result's header : %s ", service, action, header) return result def cancel_replace(self, partner_id, entry_id): return self.handle_request(partner_id, 'media', 'cancelReplace', entry_id) def get_recorded_entry(self, partner_id, entry_id): return self.handle_request(partner_id, 'media', 'get', entry_id) def get_live_entry(self, entry_id): self.get_kaltura_session( ) # generate KS in case that not existed or expired result = self.client.liveStream.get(entry_id) self.logger.info("Header :%s ", result[1]) return result[0] def upload_token_add(self, partner_id, file_name, file_size): upload_token_obj = KalturaUploadToken() upload_token_obj.fileName = file_name upload_token_obj.fileSize = file_size result = self.handle_request(partner_id, 'uploadToken', 'add', upload_token_obj) self.logger.info("Token id : [%s], file name: [%s], partnerId: [%s]", result.id, file_name, partner_id) return result.id def upload_token_list(self, partner_id, file_name): upload_token_filter = KalturaUploadTokenFilter() upload_token_filter.fileNameEqual = file_name upload_token_filter.statusIn = ''.join([ str(KalturaUploadTokenStatus.PENDING), ',', str(KalturaUploadTokenStatus.PARTIAL_UPLOAD), ',', str(KalturaUploadTokenStatus.FULL_UPLOAD) ]) return self.handle_request(partner_id, 'uploadToken', 'list', upload_token_filter) def upload_token_upload(self, upload_chunk_obj): token = upload_chunk_obj.upload_session.token_id file_name = upload_chunk_obj.upload_session.file_name chunks_to_upload = upload_chunk_obj.upload_session.chunks_to_upload sequence_number = upload_chunk_obj.chunk_index resume = upload_chunk_obj.resume final_chunk = upload_chunk_obj.final_chunk resume_at = upload_chunk_obj.resume_at self.logger.info( "About to upload chunk [%s] from [%s] in file [%s] token:[%s], resume:[%s], " "final_chunk [%s], resume_at: [%s]", sequence_number, chunks_to_upload, file_name, token, resume, final_chunk, resume_at) result = self.handle_request( upload_chunk_obj.upload_session.partner_id, 'uploadToken', 'upload', token, upload_chunk_obj.file_obj, resume, final_chunk, resume_at) self.logger.info("Finish to upload, result: %s", self.upload_token_result_to_json(result)) return result @staticmethod def upload_token_result_to_json( result ): # wrapped by try catch in order to prevent upload token to be failed. result_dictionary = { "fileName": result.fileName, "fileSize": result.fileSize, "token": result.id, "partnerId": result.partnerId, "status": result.status.value, "uploadFileSize": result.uploadedFileSize } return json.dumps(result_dictionary, ensure_ascii=False) def set_recorded_content(self, entry_id, resource, duration, partner_id, recorded_id): self.logger.info( "set_recorded_content entry_id [%s], resource [%s] duration [%s] recorded_id [%s]", entry_id, resource.__class__.__name__, duration, recorded_id) self.handle_request(partner_id, 'liveStream', 'setRecordedContent', entry_id, 0, resource, duration, recorded_id) def set_recorded_content_remote(self, upload_session, duration): token_id = upload_session.token_id recorded_id = upload_session.recorded_id entry_id = upload_session.entry_id partner_id = upload_session.partner_id resource = KalturaUploadedFileTokenResource(token_id) self.logger.info( "set_recorded_content_remote partner_id [%s] token [%s] duration [%s]", partner_id, token_id, duration) self.set_recorded_content(entry_id, resource, duration, partner_id, recorded_id) def set_recorded_content_local(self, partner_id, entry_id, output_file, duration, recorded_id): # todo check it self.logger.info( "set_recorded_content_local partner_id [%s] output_file [%s] duration [%s]", partner_id, output_file, duration) resource = KalturaServerFileResource() resource.localFilePath = output_file self.set_recorded_content(entry_id, resource, duration, partner_id, recorded_id)