def _CONFIGURE(self): """ Prompt user for settings as needed for yaml """ with open(self.default_yaml, 'r') as stream: try: config_dict = yaml.load(stream) except yaml.YAMLError as exc: ErrorObject.print_error( message = 'default YAML read error' ) return None output_dict = {} for j, k in config_dict.iteritems(): sys.stdout.write('\r') new_value = raw_input('%s :' % (j)) # clean tailing slashes here if new_value is not None and len(new_value) > 0 and new_value[-1] == '/': # if output_dict[j] = new_value[:-1] else: output_dict[j] = new_value sys.stdout.flush() with open(self.instance_yaml, 'w') as outfile: outfile.write( yaml.dump( output_dict, default_flow_style=False ) ) self.settings_dict = output_dict
def veda_tokengen(): """ Gen and authorize a VEDA API token """ '''Generate Token''' payload = {'grant_type': 'client_credentials'} r = requests.post(settings['veda_token_url'] + '/', params=payload, auth=(settings['veda_client_id'], settings['veda_secret_key']), timeout=20) if r.status_code == 200: veda_token = ast.literal_eval(r.text)['access_token'] else: ErrorObject().print_error(message='VEDA Token Generate', ) return None '''Authorize token''' """ This is based around the VEDA "No Auth Server" hack NOTE: After much screwing around, I couldn't get nginx to pass the auth headers, so I'm moving to token auth **it's shit, and needs a rewrite. see api.py """ payload = {'data': veda_token} t = requests.post(settings['veda_auth_url'] + '/', data=payload) if t.status_code == 200: # and t.text == 'True': return t.text.strip() else: ErrorObject().print_error(message='VEDA Token Authorization', ) return None
def _EXECUTE_ENCODE(self): """ if this is just a filepath, this should just work --no need to move the source-- """ if not os.path.exists( os.path.join(self.workdir, self.source_file) ): ErrorObject().print_error( message = 'Source File (local) NOT FOUND', ) return None process = subprocess.Popen( self.ffcommand, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, universal_newlines=True ) print '%s : %s' % (self.VideoObject.veda_id, self.encode_profile) Output.status_bar(process=process) # to be polite print self.output_file = self.ffcommand.split('/')[-1] if not os.path.exists( os.path.join(self.workdir, self.output_file) ): ErrorObject().print_error( message = 'Source File (local) NOT FOUND', )
def _CONFIGURE(self): """ Prompt user for settings as needed for yaml """ with open(self.default_yaml, 'r') as stream: try: config_dict = yaml.load(stream) except yaml.YAMLError as exc: ErrorObject.print_error(message='default YAML read error') return None output_dict = {} for j, k in config_dict.iteritems(): sys.stdout.write('\r') new_value = raw_input('%s :' % (j)) # clean tailing slashes here if new_value is not None and len( new_value) > 0 and new_value[-1] == '/': # if output_dict[j] = new_value[:-1] else: output_dict[j] = new_value sys.stdout.flush() with open(self.instance_yaml, 'w') as outfile: outfile.write(yaml.dump(output_dict, default_flow_style=False)) self.settings_dict = output_dict
def generate(self): """ Generate command for ffmpeg lib """ if self.VideoObject == None: ErrorObject().print_error( message='Command Gen Fail: No Video Object') return None if self.EncodeObject == None: ErrorObject().print_error( message='Command Gen Fail: No Encode Object') return None """ These build the command, and, unfortunately, must be in order """ self._call() self._codec() if ENFORCE_TARGET_ASPECT is True: self._scalar() self._bitdepth() self._passes() self._destination() return " ".join(self.ffcommand)
def _ENG_INTAKE(self): """ Copy file down from AWS S3 storage bucket """ if self.VideoObject.valid is False: ErrorObject().print_error( message = 'Invalid Video' ) return None conn = S3Connection( self.settings['aws_access_key'], self.settings['aws_secret_key'] ) try: bucket = conn.get_bucket(self.settings['aws_storage_bucket']) except: ErrorObject().print_error( message = 'Invalid Storage Bucket' ) return None self.source_file = '.'.join(( self.VideoObject.veda_id, self.VideoObject.mezz_extension )) source_key = bucket.get_key(self.source_file) if source_key == None: ErrorObject().print_error( message = 'S3 Intake Object NOT FOUND', ) return None source_key.get_contents_to_filename( os.path.join(self.workdir, self.source_file) ) if not os.path.exists(os.path.join(self.workdir, self.source_file)): ErrorObject().print_error( message = 'Engine Intake Download', ) return None self.VideoObject.valid = ValidateVideo( filepath=os.path.join(self.workdir, self.source_file) ).valid
def send_veda_status(self): """ VEDA Stati (as of 05-2016) [salient only to NODE], kept in 'globals' ---- 'Active Transcode' ---- * This will update a video's status """ for u in self.veda_video_dict['results']: """ This should just send transcode_active, as the other queue phases are controlled by other big veda pipeline steps """ if self.VideoObject.valid is not True: return None video_data = {'video_trans_status': self.veda_video_status} w = requests.patch('/'.join( (settings['veda_api_url'], 'videos', str(u['id']), '')), headers=self.veda_headers, data=json.dumps(video_data)) if w.status_code != 200: ErrorObject().print_error( message='VEDA API Fail: File \'GET\' Failure, no objects')
def _READ_SETTINGS(self): """ Read Extant Settings or Generate New Ones """ if not os.path.exists(self.instance_yaml): ErrorObject.print_error(message='Not Configured') return None with open(self.instance_yaml, 'r') as stream: try: self.settings_dict = yaml.load(stream) except yaml.YAMLError as exc: ErrorObject.print_error(message='Config YAML read error') return None
def determine_veda_pk(self): """ To keep things manageable, we're going to presuppose an extant VEDA video ID --if we want to generate new VEDA objects, we'll do that in a completely separate method/script, and quite frankly that belongs in "big VEDA" anyway ## Get video information (?) """ if self.VideoObject == None: return None data = { 'edx_id': self.VideoObject.veda_id, } y = requests.get('/'.join((settings['veda_api_url'], 'videos', '')), params=data, headers=self.veda_headers, timeout=20) if y.status_code != 200: ErrorObject().print_error( message='VEDA API Fail: Check VEDA API config') return None return json.loads(y.text)
def activate(self): if self.veda_id != None and len(settings['veda_api_url']) == 0: ErrorObject().print_error( message='VEDA API Config Incorrect, run test to debug') return None """ test case """ if self.veda_id is None: self.mezz_extension = '.mp4' self.mezz_title = TEST_VIDEO_FILE self.mezz_filepath = os.path.join(TEST_VIDEO_DIR, TEST_VIDEO_FILE) self.valid = True return None """ Generated Token """ veda_token = generate_apitoken.veda_tokengen() if veda_token == None: return None data = { 'edx_id': self.veda_id, } headers = { 'Authorization': 'Token ' + veda_token, 'content-type': 'application/json' } x = requests.get('/'.join((settings['veda_api_url'], 'videos', '')), params=data, headers=headers) vid_dict = json.loads(x.text) if len(vid_dict['results']) == 0: return None for v in vid_dict['results']: """ Yeah this is horrible, but it's tied to VEDA's model """ self.vid_pk = v['id'] self.class_id = v['inst_class'] self.val_id = v['studio_id'] self.mezz_extension = v['video_orig_extension'] self.mezz_bitrate = v['video_orig_bitrate'] self.mezz_title = v['client_title'] self.mezz_filesize = v['video_orig_filesize'] '''Do some field cleaning in case of SAR/DAR legacy errors''' mezz_resolution = v['video_orig_resolution'].strip().split(' ')[0] self.mezz_resolution = mezz_resolution '''Clean from unicode (00:00:00.53)''' uni_duration = v['video_orig_duration'] self.mezz_duration = Output._seconds_from_string(uni_duration) self.mezz_filepath = '/'.join( ('https://s3.amazonaws.com', settings['aws_storage_bucket'], self.veda_id + '.' + self.mezz_extension)) self.valid = True
def _READ_SETTINGS(self): """ Read Extant Settings or Generate New Ones """ if not os.path.exists(self.instance_yaml): ErrorObject.print_error( message = 'Not Configured' ) return None with open(self.instance_yaml, 'r') as stream: try: self.settings_dict = yaml.load(stream) except yaml.YAMLError as exc: ErrorObject.print_error( message = 'Config YAML read error' ) return None
def pull_data(self): encode_dict = {} veda_token = generate_apitoken.veda_tokengen() if veda_token == None: ErrorObject().print_error(message="VEDA Token Generate") return None data = {'product_spec': self.profile_name} headers = { 'Authorization': 'Token ' + veda_token, 'content-type': 'application/json' } x = requests.get('/'.join((settings['veda_api_url'], 'encodes')), params=data, headers=headers) enc_dict = json.loads(x.text) if len(enc_dict['results']) == 0: ErrorObject().print_error( message="VEDA API Encode Mismatch: No Data") return None for e in enc_dict['results']: if e['product_spec'] == self.profile_name and e[ 'profile_active'] is True: self.resolution = e['encode_resolution'] self.rate_factor = e['encode_bitdepth'] self.filetype = e['encode_filetype'] self.encode_suffix = e['encode_suffix'] self.encode_pk = e['id'] if self.encode_suffix == None: ErrorObject().print_error( message="VEDA API Encode Data Fail: No Suffix") return None
def _s3_upload(self): """ Upload single part (under threshold in node_config) node_config MULTI_UPLOAD_BARRIER """ try: conn = boto.connect_s3(settings['aws_deliver_access_key'], settings['aws_deliver_secret_key']) delv_bucket = conn.get_bucket(settings['aws_deliver_bucket']) except: ErrorObject().print_error( message='Deliverable Fail: s3 Connection Error - Singleton') return False upload_key = Key(delv_bucket) upload_key.key = self.output_file upload_key.set_contents_from_filename( os.path.join(self.workdir, self.output_file)) return True
def run_veda(self): if len(settings) == 0: return None self.veda_token = generate_apitoken.veda_tokengen() if self.veda_token == None: ErrorObject().print_error( message='VEDA API Conn Fail:\nInvalid Setup/Method') return None self.veda_headers = { 'Authorization': 'Token ' + self.veda_token, # + veda_token, 'content-type': 'application/json' } self.veda_video_dict = self.determine_veda_pk() """ Status Update Only """ if self.veda_video_status is not None: return self.send_veda_status()
def _HLSPipeline(self): """ Activate HLS, use hls lib to upload """ if not os.path.exists(os.path.join(self.workdir, self.source_file)): ErrorObject().print_error( message = 'Source File (local) NOT FOUND', ) return None os.chdir(self.workdir) V1 = VHLS( mezz_file=os.path.join(self.workdir, self.source_file), DELIVER_BUCKET=self.settings['edx_s3_endpoint_bucket'], ACCESS_KEY_ID=self.settings['edx_access_key_id'], SECRET_ACCESS_KEY = self.settings['edx_secret_access_key'] ) if V1.complete is True: self.endpoint_url = V1.manifest_url else: return None
def val_tokengen(): """ Gen and authorize a VAL API token """ payload = { 'grant_type': 'password', 'client_id': settings['val_client_id'], 'client_secret': settings['val_secret_key'], 'username': settings['val_username'], 'password': settings['val_password'] } r = requests.post(settings['val_token_url'] + '/', data=payload, timeout=20) if r.status_code != 200: ErrorObject().print_error( message='Token Gen Fail: VAL\nCheck VAL Config') return None val_token = ast.literal_eval(r.text)['access_token'] return val_token
def _boto_multipart(self): """ Split file into chunks, upload chunks NOTE: this should never happen, as your files should be much smaller than this, but one never knows """ if not os.path.exists( os.path.join(self.workdir, self.output_file.split('.')[0])): os.mkdir(os.path.join(self.workdir, self.output_file.split('.')[0])) os.chdir(os.path.join(self.workdir, self.output_file.split('.')[0])) """ Split File into chunks """ split_command = 'split -b10m -a5' ##5 part names of 5mb sys.stdout.write('%s : %s\n' % (self.output_file, 'Generating Multipart')) os.system(' '.join( (split_command, os.path.join(self.workdir, self.output_file)))) sys.stdout.flush() """ Connect to s3 """ try: c = boto.connect_s3(settings['aws_deliver_access_key'], settings['aws_deliver_secret_key']) b = c.lookup(settings['aws_deliver_bucket']) except: ErrorObject().print_error( message='Deliverable Fail: s3 Connection Error - Multipart') return False if b == None: ErrorObject().print_error( message='Deliverable Fail: s3 Bucket Connection Error') return False """ Upload and stitch parts """ mp = b.initiate_multipart_upload(self.output_file) x = 1 for file in sorted( os.listdir( os.path.join(self.workdir, self.output_file.split('.')[0]))): sys.stdout.write('%s : %s\r' % (file, 'uploading part')) fp = open(file, 'rb') mp.upload_part_from_file(fp, x) fp.close() sys.stdout.flush() x += 1 sys.stdout.write('\n') mp.complete_upload() """Clean up multipart""" shutil.rmtree( os.path.join(self.workdir, self.output_file.split('.')[0])) return True
def run(self): WS = WorkerSetup() if self.setup is True: WS.setup = True WS.run() self.settings = WS.settings_dict if self.encode_profile is None: ErrorObject().print_error( message = 'No Encode Profile Specified' ) return None self.VideoObject = Video( veda_id=self.veda_id ) self.VideoObject.activate() if self.VideoObject.valid is False: ErrorObject().print_error( message = 'Invalid Video / VEDA Data' ) return None if not os.path.exists(self.workdir): os.mkdir(self.workdir) """ Pipeline Steps : I. Intake Ib. Validate Mezz II. change status in APIs III. Generate Encode Command IV. Execute Encodes IVa. Validate Products (*)V. Deliver Encodes (sftp and others?), retrieve URLs (*)VI. Change Status in APIs, add URLs VII. Clean Directory """ self._ENG_INTAKE() if self.VideoObject.valid is False: ErrorObject().print_error( message = 'Invalid Video / Local' ) return None self._UPDATE_API() if self.encode_profile == 'hls': self._HLSPipeline() else: self._StaticPipeline() print self.endpoint_url if self.endpoint_url is not None: """ Integrate with main """ veda_id = self.veda_id encode_profile = self.encode_profile celeryapp.deliverable_route.apply_async( (veda_id, encode_profile), queue='transcode_stat' ) """ Clean up workdir """ if self.jobid is not None: shutil.rmtree( self.workdir )
def validate(self): """ First: a general file test -size > 0, -file exists """ if not os.path.exists(self.filepath): ErrorObject().print_error( message = 'File QA fail: File is not found\n' + \ self.filepath ) return False if os.stat(self.filepath).st_size == 0: ErrorObject().print_error(message='File QA fail: Filesize is 0') return False """ ffprobe file information """ ffcommand = 'ffprobe -hide_banner ' ffcommand += '\"' + self.filepath + '\"' p = subprocess.Popen(ffcommand, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) for line in iter(p.stdout.readline, b''): if 'No such file or directory' in line: return False if 'Invalid data found when processing input' in line: return False if "multiple edit list entries, a/v desync might occur, patch welcome" in line: return False if "Duration: " in line: """Get and Test Duration""" if "Duration: 00:00:00.0" in line: return False elif "Duration: N/A, " in line: return False vid_duration = line.split('Duration: ')[1].split( ',')[0].strip() duration = Output._seconds_from_string(duration=vid_duration) if duration < 1.05: return False try: duration except: return False """ duration test (if not mezz, is equal to mezz) """ if self.VideoObject != None and self.product_file is True: ## within five seconds if not (self.VideoObject.mezz_duration - 5) <= duration <= (self.VideoObject.mezz_duration + 5): return False return True
def send_val_data(self): """ VAL is very tetchy -- it needs a great deal of specific info or it will fail """ ''' sending_data = { encoded_videos = [{ url="https://testurl.mp4", file_size=8499040, bitrate=131, profile="override", }, {...},], client_video_id = "This is a VEDA-VAL Test", courses = [ "TEST", "..." ], duration = 517.82, edx_video_id = "TESTID", status = "transcode_active" } ## "POST" for new objects to 'video' root url ## "PUT" for extant objects to video/id -- cannot send duplicate course records ''' if self.val_token == None: return None # in case non-studio side upload if self.VideoObject.val_id is None or len( self.VideoObject.val_id) == 0: self.VideoObject.val_id = self.VideoObject.veda_id val_data = { 'client_video_id': self.VideoObject.val_id, 'duration': self.VideoObject.mezz_duration, 'edx_video_id': self.VideoObject.val_id, } if not isinstance(self.VideoObject.course_url, list): self.VideoObject.course_url = [self.VideoObject.course_url] r1 = requests.get('/'.join( (settings['val_api_url'], self.VideoObject.val_id, '')), headers=self.val_headers, timeout=20) if r1.status_code != 200 and r1.status_code != 404: """ Total API Failure """ ErrorObject().print_error(message='VAL Communication Fail') return None if r1.status_code == 404: """ Generate new VAL ID (shouldn't happen, but whatever) """ val_data['encoded_videos'] = [] val_data['courses'] = self.VideoObject.course_url val_data['status'] = self.val_video_status ## FINAL CONNECTION r2 = requests.post(settings['val_api_url'], data=json.dumps(val_data), headers=self.val_headers, timeout=20) if r2.status_code > 299: ErrorObject().print_error(method=self, message='VAL POST/PUT Fail: VAL') return None elif r1.status_code == 200: """ ID is previously extant """ val_api_return = ast.literal_eval(r1.text) """ VAL will not allow duped studio urls to be sent, so we must scrub the data """ for c in self.VideoObject.course_url: if c in [o for o in val_api_return['courses']]: self.VideoObject.course_url.remove(c) val_data['courses'] = self.VideoObject.course_url """ Double check for profiles in case of overwrite """ val_data['encoded_videos'] = [] # add back in the encodes for e in val_api_return['encoded_videos']: val_data['encoded_videos'].append(e) """ Determine Status """ val_data['status'] = self.val_video_status """ Make Request, finally """ r2 = requests.put('/'.join( (settings['val_api_url'], self.VideoObject.val_id)), data=json.dumps(val_data), headers=self.val_headers, timeout=20) if r2.status_code > 299: ErrorObject().print_error(message='VAL POST/PUT Fail') return None