def _get_max_complete_date(self, job): """ Return the max_complete_date from aws :param job: instance of ScheduledJob :type job: ScheduledJob """ job_dict = job.get(s3_path=None) # s3_path sample: s3://bucket_name/logs/log_name/ s3_path = job_dict.get('s3_path') if s3_path is None: return None bucket_name, prefix = parse_s3_path(s3_path) prefix_list = prefix.split("/") if prefix_list[-1] is not '': log_name = prefix_list[-1] else: log_name = prefix_list[-2] try: log_data = get_log_meta_data(bucket_name, log_name) return get_deep(log_data, ['log', 'max_complete_date'], None) except Exception: log_exception( "Exception in running scanner when getting max_complete_date in s3 path: " + s3_path ) return None
def data_available(prefix, input_date, local, done_file_name='COMPLETE', force_et=False): """ data_available takes a prefix and input_date and returns a True or False depending on whether there is a done_file_name in the S3 path Args: prefix -- the s3 path prefix of the form 's3://bucket/key1/key2/key3/...' input_date -- a date string of the form 'YYYY/MM/DD' local -- boolean; run on a dev machine or stage force_et -- boolean; True to run the mr without the "done_file_name" Returns: True / False """ if force_et: return True bucket, prefix_s3 = parse_s3_path(prefix) key = prefix_s3 + os.sep + input_date + os.sep + done_file_name key = re.sub(os.sep + '+', os.sep, key) # remove extra slashes if any return bucket_key_exists(bucket, key, local)