Example #1
0
    def _get_max_complete_date(self, job):
        """ Return the max_complete_date from aws

        :param job: instance of ScheduledJob
        :type job: ScheduledJob
        """
        job_dict = job.get(s3_path=None)
        # s3_path sample: s3://bucket_name/logs/log_name/
        s3_path = job_dict.get('s3_path')
        if s3_path is None:
            return None
        bucket_name, prefix = parse_s3_path(s3_path)
        prefix_list = prefix.split("/")
        if prefix_list[-1] is not '':
            log_name = prefix_list[-1]
        else:
            log_name = prefix_list[-2]
        try:
            log_data = get_log_meta_data(bucket_name, log_name)
            return get_deep(log_data, ['log', 'max_complete_date'], None)
        except Exception:
            log_exception(
                "Exception in running scanner when getting max_complete_date in s3 path: "
                + s3_path
            )
        return None
Example #2
0
def data_available(prefix,
                   input_date,
                   local,
                   done_file_name='COMPLETE',
                   force_et=False):
    """
    data_available takes a prefix and input_date and returns a
    True or False depending on whether there is a done_file_name
    in the S3 path

    Args:
    prefix -- the s3 path prefix of the form 's3://bucket/key1/key2/key3/...'
    input_date -- a date string of the form 'YYYY/MM/DD'
    local -- boolean; run on a dev machine or stage
    force_et -- boolean; True to run the mr without the "done_file_name"

    Returns:
    True / False
    """
    if force_et:
        return True

    bucket, prefix_s3 = parse_s3_path(prefix)
    key = prefix_s3 + os.sep + input_date + os.sep + done_file_name
    key = re.sub(os.sep + '+', os.sep, key)  # remove extra slashes if any
    return bucket_key_exists(bucket, key, local)
Example #3
0
def data_available(prefix, input_date, local, done_file_name='COMPLETE', force_et=False):
    """
    data_available takes a prefix and input_date and returns a
    True or False depending on whether there is a done_file_name
    in the S3 path

    Args:
    prefix -- the s3 path prefix of the form 's3://bucket/key1/key2/key3/...'
    input_date -- a date string of the form 'YYYY/MM/DD'
    local -- boolean; run on a dev machine or stage
    force_et -- boolean; True to run the mr without the "done_file_name"

    Returns:
    True / False
    """
    if force_et:
        return True

    bucket, prefix_s3 = parse_s3_path(prefix)
    key = prefix_s3 + os.sep + input_date + os.sep + done_file_name
    key = re.sub(os.sep + '+', os.sep, key)  # remove extra slashes if any
    return bucket_key_exists(bucket, key, local)