Example #1
0
def main(args: argparse.Namespace) -> None:
    """ entry point for the 'backup' subcommand """
    if args.dry_run:
        logger.warning('Running in dry-run mode; no files will be backed up!')
    logger.info(f'Starting backup for {args.name}')
    backup_store = get_backup_store(args.name)

    with backup_store.unlock(dry_run=args.dry_run,
                             preserve_scratch=args.preserve_scratch_dir):
        marked_files: Set[str] = set()
        for base_path in staticconf.read_list('directories',
                                              namespace=args.name):
            abs_base_path = os.path.abspath(base_path)
            exclusions = compile_exclusions(
                staticconf.read_list('exclusions', [], namespace=args.name))
            marked_files |= _scan_directory(
                abs_base_path,
                backup_store,
                exclusions,
                args.dry_run,
            )

        for abs_file_name in backup_store.manifest.files() - marked_files:
            logger.info(f'{abs_file_name} has been deleted')
            if not args.dry_run:
                backup_store.manifest.delete(abs_file_name)
    logger.info(f'Backup for {args.name} finished')
Example #2
0
def test_create_emr_args(input_date, dev, cores, pipeline_yaml):
    print "just starting"
    load_package_config('config.yaml')
    YamlConfiguration(pipeline_yaml)

    input_prefix = read_list('pipeline.et_step.s3_prefixes')[0]
    input_file = input_prefix + input_date + '/part-*.gz'

    expected_args = EXPECTED_DEV_ARGS if dev else EXPECTED_AWS_ARGS
    expected_out_file = read_string('pipeline.s3_output_prefix')
    delimiter = read_string('redshift_column_delimiter')
    with mock.patch.dict(os.environ, {'LOGNAME': 'testuser', 'YELPCODE': '.'}):
        logname = os.environ['LOGNAME']
        expected_out_file = os.path.join(
            expected_out_file.format(logname=logname),
            input_date
        )
        extractions = pipeline_yaml_schema_file_path()
        formatted_args = expected_args.format(input_file,
                                              expected_out_file,
                                              cores,
                                              extractions,
                                              delimiter)
        output_under_test = create_emr_args(input_date, 10,
                                            input_prefix, dev)
        assert output_under_test == formatted_args
Example #3
0
def get_autoscaling_config(config_namespace: str) -> AutoscalingConfig:
    """ Load autoscaling configuration values from the provided config_namespace, falling back to the
    values stored in the default namespace if none are specified.

    :param config_namespace: namespace to read from before falling back to the default namespace
    :returns: AutoscalingConfig object with loaded config values
    """
    default_excluded_resources = staticconf.read_list(
        'autoscaling.excluded_resources', default=[])
    default_setpoint = staticconf.read_float('autoscaling.setpoint')
    default_target_capacity_margin = staticconf.read_float(
        'autoscaling.target_capacity_margin')

    reader = staticconf.NamespaceReaders(config_namespace)
    return AutoscalingConfig(
        excluded_resources=reader.read_list(
            'autoscaling.excluded_resources',
            default=default_excluded_resources),
        setpoint=reader.read_float('autoscaling.setpoint',
                                   default=default_setpoint),
        target_capacity_margin=reader.read_float(
            'autoscaling.target_capacity_margin',
            default=default_target_capacity_margin,
        ),
    )
Example #4
0
def test_get_metrics(end_time):

    required_metrics = staticconf.read_list(
        'autoscale_signal.required_metrics',
        namespace='bar.mesos_config',
    )
    metrics_client = mock.Mock()
    metrics_client.get_metric_values.side_effect = [
        {
            'cpus_allocated': [(1, 2), (3, 4)]
        },
        {
            'cpus_allocated': [(5, 6), (7, 8)]
        },
        {
            'app1,cost': [(1, 2.5), (3, 4.5)]
        },
    ]
    metrics = get_metrics_for_signal('foo', 'bar', 'mesos', 'app1',
                                     metrics_client, required_metrics,
                                     end_time)
    assert metrics_client.get_metric_values.call_args_list == [
        mock.call(
            'cpus_allocated',
            SYSTEM_METRICS,
            end_time.shift(minutes=-10).timestamp,
            end_time.timestamp,
            app_identifier='app1',
            extra_dimensions={
                'cluster': 'foo',
                'pool': 'bar'
            },
            is_regex=False,
        ),
        mock.call(
            'cpus_allocated',
            SYSTEM_METRICS,
            end_time.shift(minutes=-10).timestamp,
            end_time.timestamp,
            app_identifier='app1',
            extra_dimensions={
                'cluster': 'foo',
                'pool': 'bar.mesos'
            },
            is_regex=False,
        ),
        mock.call(
            'cost',
            APP_METRICS,
            end_time.shift(minutes=-30).timestamp,
            end_time.timestamp,
            app_identifier='app1',
            extra_dimensions={},
            is_regex=False,
        ),
    ]
    assert 'cpus_allocated' in metrics
    assert 'app1,cost' in metrics
Example #5
0
def _split_root_prefix(abs_file_name: str,
                       backup_name: str) -> Tuple[str, str]:
    for directory in staticconf.read_list('directories',
                                          namespace=backup_name):
        abs_root = os.path.abspath(directory) + os.path.sep
        if abs_file_name.startswith(abs_root):
            return abs_root, abs_file_name[len(abs_root):]
    raise ValueError(
        f'{abs_file_name} does not start with any directory prefix')
Example #6
0
def _load_module_configs(env_config_path: str):
    staticconf.YamlConfiguration(env_config_path)
    for config in staticconf.read_list('module_config', default=[]):
        if 'file' in config:
            staticconf.YamlConfiguration(config['file'],
                                         namespace=config['namespace'])
        staticconf.DictConfiguration(config.get('config', {}),
                                     namespace=config['namespace'])
        if 'initialize' in config:
            path = config['initialize'].split('.')
            function = path.pop()
            module_name = '.'.join(path)
            module = __import__(module_name, globals(), locals(), [path[-1]])
            getattr(module, function)()
Example #7
0
def s3_to_psv_main(args):

    mrjob = read_string('pipeline.et_step.mrjob')
    stream_name = read_string('pipeline.et_step.s3_to_s3_stream')
    DATABASE = read_string('pipeline.redshift_database')

    LOG_STREAM = PipelineStreamLogger(
        stream_name,
        args.run_local,
        mrjob,
        input_date=args.date
    )

    day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM)

    try:
        if not args.run_local:
            setup_private(args.private)
        # Create a psql instance based on args
        if args.skip_progress_in_redshift:
            status_table = DynamoDbStatusTable(
                LOG_STREAM, run_local=args.run_local
            )
        else:
            status_table = RedshiftStatusTable(
                RedshiftPostgres(
                    LOG_STREAM, args.private, run_local=args.run_local
                )
            )
        load_msg = __load_data_from_s3(
            status_table,
            read_list('pipeline.et_step.s3_prefixes'),
            day_to_run,
            mrjob,
            args.run_local,
            DATABASE,
            LOG_STREAM,
            force_et=args.force_et
        )
        LOG_STREAM.write_msg("complete", extra_msg=load_msg)

    finally:
        clear_env(args.run_local)
Example #8
0
def test_create_emr_args(input_date, dev, cores, pipeline_yaml):
    print "just starting"
    load_package_config('config.yaml')
    YamlConfiguration(pipeline_yaml)

    input_prefix = read_list('pipeline.et_step.s3_prefixes')[0]
    input_file = input_prefix + input_date + '/part-*.gz'

    expected_args = EXPECTED_DEV_ARGS if dev else EXPECTED_AWS_ARGS
    expected_out_file = read_string('pipeline.s3_output_prefix')
    delimiter = read_string('redshift_column_delimiter')
    with mock.patch.dict(os.environ, {'LOGNAME': 'testuser', 'YELPCODE': '.'}):
        logname = os.environ['LOGNAME']
        expected_out_file = os.path.join(
            expected_out_file.format(logname=logname), input_date)
        extractions = pipeline_yaml_schema_file_path()
        formatted_args = expected_args.format(input_file, expected_out_file,
                                              cores, extractions, delimiter)
        output_under_test = create_emr_args(input_date, 10, input_prefix, dev)
        assert output_under_test == formatted_args
Example #9
0
def s3_to_psv_main(args):

    mrjob = read_string('pipeline.et_step.mrjob')
    stream_name = read_string('pipeline.et_step.s3_to_s3_stream')
    DATABASE = read_string('pipeline.redshift_database')

    LOG_STREAM = PipelineStreamLogger(stream_name,
                                      args.run_local,
                                      mrjob,
                                      input_date=args.date)

    day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM)

    try:
        if not args.run_local:
            setup_private(args.private)
        # Create a psql instance based on args
        if args.skip_progress_in_redshift:
            status_table = DynamoDbStatusTable(LOG_STREAM,
                                               run_local=args.run_local)
        else:
            status_table = RedshiftStatusTable(
                RedshiftPostgres(LOG_STREAM,
                                 args.private,
                                 run_local=args.run_local))
        load_msg = __load_data_from_s3(
            status_table,
            read_list('pipeline.et_step.s3_prefixes'),
            day_to_run,
            mrjob,
            args.run_local,
            DATABASE,
            LOG_STREAM,
            force_et=args.force_et)
        LOG_STREAM.write_msg("complete", extra_msg=load_msg)

    finally:
        clear_env(args.run_local)
Example #10
0
def sensu_checkin(
    *,
    check_name: str,
    output: str,
    source: str,
    status: Status = Status.OK,
    app: Optional[str] = None,
    pool: Optional[str] = None,
    scheduler: Optional[str] = None,
    noop: bool = False,
    page: bool = True,
    **kwargs: Any,
) -> None:
    # This function feels like a massive hack, let's revisit and see if we can make it better (CLUSTERMAN-304)
    #
    # TODO (CLUSTERMAN-126) right now there's only one app per pool so use the global pool namespace
    # We assume the "pool" name and the "app" name are the same
    #
    # Use 'no-namespace' instead of None so we don't skip the per-cluster override
    pool_namespace = POOL_NAMESPACE.format(
        pool=app, scheduler=scheduler) if app else 'no-namespace'

    # read the sensu configuration from srv-configs; signals are not required to define this, so in the case
    # that they do not define anything, we fall back to the clusterman config.  The clusterman config can override
    # alerts on a per-cluster basis, so first check there; if nothing is defined there, fall back to the default,
    # which is required to be defined, so we know that someone is going to get the notification
    #
    sensu_config = dict(
        staticconf.read_list('sensu_config',
                             default=[{}],
                             namespace=pool_namespace).pop())
    if not sensu_config:
        sensu_config = dict(
            staticconf.read_list(f'clusters.{source}.sensu_config',
                                 default=[{}]).pop())
    if not sensu_config:
        sensu_config = dict(staticconf.read_list('sensu_config').pop())

    # If we've turned off paging in the config, we don't want this function to ever page
    config_page = sensu_config.pop('page', None)
    page = False if config_page is False else page

    # So we know where alerts are coming from precisely
    output += ''.join([
        '\n\nThis check came from:\n',
        f'- Cluster/region: {source}\n',
        f'- Pool: {pool}.{scheduler}\n' if pool else '',
        f'- App: {app}\n' if app else '',
    ])

    sensu_config.update({
        'name': check_name,
        'output': output,
        'source': source,
        'status': status.value,
        'page': page,
    })
    # values passed in to this function override config file values (is this really correct??)
    sensu_config.update(kwargs)

    pysensu_yelp = _get_sensu()
    if noop or not pysensu_yelp:
        logger.info(('Would have sent this event to Sensu:\n'
                     f'{pprint.pformat(sensu_config)}'))
        return

    # team and runbook are required entries in srv-configs, so we know this will go to the "right" place
    pysensu_yelp.send_event(**sensu_config)