def main():
    """
    Main Function
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dependencies', type=str, nargs='+', default=None)
    parser.add_argument('--refresh_rate', dest='refresh_rate', default='900')
    parser.add_argument('--start_date', dest='start_date')

    args = parser.parse_args()

    # Exit if there are no dependencies
    if not args.dependencies:
        sys.exit()

    # Create mapping from pipeline name to id
    pipeline_name_to_id = dict(
        (pipeline['name'], pipeline['id']) for pipeline in list_pipelines()
    )

    # Remove whitespace from dependency list
    dependencies = map(str.strip, args.dependencies)

    # Check if all dependencies are valid pipelines
    for dependency in dependencies:
        if dependency not in pipeline_name_to_id:
            raise Exception('Pipeline not found: %s.' % dependency)

    # Map from pipeline object to pipeline ID
    dependencies = [pipeline_name_to_id[dependency]
                    for dependency in dependencies]

    print 'Start checking for dependencies'
    start_time = datetime.now()

    # Loop until all dependent pipelines have finished
    while not check_dependencies_ready(dependencies, args.start_date):
        print 'checking'
        time.sleep(float(args.refresh_rate))

    print 'Finished checking for dependencies. Total time spent: ',
    print (datetime.now() - start_time).total_seconds(), ' seconds'
Exemple #2
0
def main():
    """
    Main Function
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--dependencies', type=str, nargs='+', default=None)
    parser.add_argument('--refresh_rate', dest='refresh_rate', default='900')
    parser.add_argument('--start_date', dest='start_date')

    args = parser.parse_args()

    # Exit if there are no dependencies
    if not args.dependencies:
        sys.exit()

    # Create mapping from pipeline name to id
    pipeline_name_to_id = dict(
        (pipeline['name'], pipeline['id']) for pipeline in list_pipelines())

    # Remove whitespace from dependency list
    dependencies = map(str.strip, args.dependencies)

    # Check if all dependencies are valid pipelines
    for dependency in dependencies:
        if dependency not in pipeline_name_to_id:
            raise Exception('Pipeline not found: %s.' % dependency)

    # Map from pipeline object to pipeline ID
    dependencies = [
        pipeline_name_to_id[dependency] for dependency in dependencies
    ]

    print 'Start checking for dependencies'
    start_time = datetime.now()

    # Loop until all dependent pipelines have finished
    while not check_dependencies_ready(dependencies, args.start_date):
        print 'checking'
        time.sleep(float(args.refresh_rate))

    print 'Finished checking for dependencies. Total time spent: ',
    print(datetime.now() - start_time).total_seconds(), ' seconds'
Exemple #3
0
def dependency_check():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dependencies', type=str, nargs='+', default=[])
    parser.add_argument('--dependencies_ok_to_fail',
                        type=str,
                        nargs='+',
                        default=[])
    parser.add_argument('--pipeline_name', dest='pipeline_name')
    parser.add_argument('--refresh_rate', dest='refresh_rate', default='900')
    parser.add_argument('--start_date', dest='start_date')
    parser.add_argument('--sns_topic_arn', dest="sns_topic_arn")

    args = parser.parse_args()

    # Exit if there are no dependencies
    if not args.dependencies and not args.dependencies_ok_to_fail:
        sys.exit()

    # Create mapping from pipeline name to id
    pipeline_name_to_id = dict(
        (pipeline['name'], pipeline['id']) for pipeline in list_pipelines())

    # Remove whitespace from dependency lists
    dependencies = map(str.strip, args.dependencies)
    dependencies_to_ignore = map(str.strip, args.dependencies_ok_to_fail)

    # Add the dependencies which can fail to the list of dependencies
    dependencies.extend(dependencies_to_ignore)

    # Check if all dependencies are valid pipelines
    for dependency in dependencies:
        if dependency not in pipeline_name_to_id:
            raise Exception('Pipeline not found: %s.' % dependency)

    # Map from dependency id to name
    dependencies = {pipeline_name_to_id[dep]: dep for dep in dependencies}

    print 'Start checking for dependencies'
    start_time = datetime.now()

    failures = []
    dependencies_ready = False

    # Loop until all dependent pipelines have finished or failed
    while not dependencies_ready:
        print 'checking'
        dependencies_ready, new_failures = check_dependencies_ready(
            dependencies, args.start_date, dependencies_to_ignore)
        failures.extend(new_failures)
        if not dependencies_ready:
            time.sleep(float(args.refresh_rate))

    # Send message through SNS if there are failures
    if failures:
        if args.sns_topic_arn:
            message = 'Failed dependencies: %s.' % ', '.join(set(failures))
            subject = 'Dependency error for pipeline: %s.' % args.pipeline_name
            SNSConnection().publish(args.sns_topic_arn, message, subject)
        else:
            raise Exception('ARN for SNS topic not specified in ETL config')

    print 'Finished checking for dependencies. Total time spent: ',
    print(datetime.now() - start_time).total_seconds(), ' seconds'
def main():
    """
    Main Function
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dependencies', type=str, nargs='+', default=[])
    parser.add_argument(
        '--dependencies_ok_to_fail', type=str, nargs='+', default=[])
    parser.add_argument('--pipeline_name', dest='pipeline_name')
    parser.add_argument('--refresh_rate', dest='refresh_rate', default='900')
    parser.add_argument('--start_date', dest='start_date')
    parser.add_argument('--sns_topic_arn', dest="sns_topic_arn")

    args = parser.parse_args()

    # Exit if there are no dependencies
    if not args.dependencies and not args.dependencies_ok_to_fail:
        sys.exit()

    # Create mapping from pipeline name to id
    pipeline_name_to_id = dict(
        (pipeline['name'], pipeline['id']) for pipeline in list_pipelines()
    )

    # Remove whitespace from dependency lists
    dependencies = map(str.strip, args.dependencies)
    dependencies_to_ignore = map(str.strip, args.dependencies_ok_to_fail)

    # Add the dependencies which can fail to the list of dependencies
    dependencies.extend(dependencies_to_ignore)

    # Check if all dependencies are valid pipelines
    for dependency in dependencies:
        if dependency not in pipeline_name_to_id:
            raise Exception('Pipeline not found: %s.' % dependency)

    # Map from dependency id to name
    dependencies = {pipeline_name_to_id[dep]: dep for dep in dependencies}

    print 'Start checking for dependencies'
    start_time = datetime.now()

    failures = []
    dependencies_ready = False

    # Loop until all dependent pipelines have finished or failed    
    while not dependencies_ready:
        print 'checking'
        time.sleep(float(args.refresh_rate))
        dependencies_ready, new_failures = check_dependencies_ready(dependencies, 
                                                        args.start_date,
                                                        dependencies_to_ignore)
        failures.extend(new_failures)

    # Send message through SNS if there are failures
    if failures:
        if args.sns_topic_arn:
            message = 'Failed dependencies: %s.' % ', '.join(set(failures))
            subject = 'Dependency error for pipeline: %s.' % args.pipeline_name
            SNSConnection().publish(args.sns_topic_arn, message, subject)
        else:
            raise Exception('ARN for SNS topic not specified in ETL config')

    print 'Finished checking for dependencies. Total time spent: ',
    print (datetime.now() - start_time).total_seconds(), ' seconds'