Example #1
0
 def upload_done(self):
     with threadlocal.tmp_bind(log, file=self.key) as tmp_log:
         if self.exist_on_remote():
             self.state = READY
             self.save()
             tmp_log.info('file_upload_state_success')
         else:
             tmp_log.warn('file_upload_state_failure', reason='not_on_remote')
Example #2
0
 def test_bind(self, log):
     log = log.bind(y=23)
     with tmp_bind(log, x=42, y='foo') as tmp_log:
         assert (
             {'y': 'foo', 'x': 42}
             == tmp_log._context._dict == log._context._dict
         )
     assert {'y': 23} == log._context._dict
     assert "y=23 event='foo'" == log.msg('foo')
Example #3
0
 def test_bind(self, log):
     """
     tmp_bind does not modify the thread-local state.
     """
     log = log.bind(y=23)
     with tmp_bind(log, x=42, y='foo') as tmp_log:
         assert {
             'y': 'foo', 'x': 42
         } == tmp_log._context._dict == log._context._dict
     assert {'y': 23} == log._context._dict
Example #4
0
        def wrapper(state: State) -> Process:
            with tmp_bind(logger, func=func.__qualname__) as log:
                step_in_inject_args = inject_args(func)

                try:
                    with transactional(db, log):
                        result = step_in_inject_args(state)
                        return Success(result)
                except Exception as ex:
                    return Waiting(ex)
Example #5
0
 def test_bind(self, log):
     """
     tmp_bind does not modify the thread-local state.
     """
     log = log.bind(y=23)
     with tmp_bind(log, x=42, y="foo") as tmp_log:
         assert ({
             "y": "foo",
             "x": 42
         } == tmp_log._context._dict == log._context._dict)
     assert {"y": 23} == log._context._dict
Example #6
0
 def test_bind(self, log):
     """
     tmp_bind does not modify the thread-local state.
     """
     log = log.bind(y=23)
     with tmp_bind(log, x=42, y='foo') as tmp_log:
         assert {
             'y': 'foo',
             'x': 42
         } == tmp_log._context._dict == log._context._dict
     assert {'y': 23} == log._context._dict
Example #7
0
 def test_bind(self, log):
     """
     tmp_bind does not modify the thread-local state.
     """
     log = log.bind(y=23)
     with tmp_bind(log, x=42, y="foo") as tmp_log:
         assert (
             {"y": "foo", "x": 42}
             == tmp_log._context._dict
             == log._context._dict
         )
     assert {"y": 23} == log._context._dict
 def test_bind_exc(self, log):
     """
     tmp_bind cleans up properly on exceptions.
     """
     log = log.bind(y=23)
     with pytest.raises(ValueError):
         with tmp_bind(log, x=42, y='foo') as tmp_log:
             assert {
                        'y': 'foo', 'x': 42
                    } == tmp_log._context._dict == log._context._dict
             raise ValueError
     assert {'y': 23} == log._context._dict
Example #9
0
        def wrapper(state: State) -> Process:
            with tmp_bind(logger, func=func.__qualname__) as log:

                step_in_inject_args = inject_args(func)

                try:
                    with transactional(db, log):
                        result = step_in_inject_args(state)
                        return Success(result)
                except Exception as ex:
                    log.warning("Step failed", exc_info=ex)
                    return Failed(ex)
Example #10
0
 def test_bind_exc(self, log):
     """
     tmp_bind cleans up properly on exceptions.
     """
     log = log.bind(y=23)
     with pytest.raises(ValueError):
         with tmp_bind(log, x=42, y='foo') as tmp_log:
             assert {
                 'y': 'foo',
                 'x': 42
             } == tmp_log._context._dict == log._context._dict
             raise ValueError
     assert {'y': 23} == log._context._dict
Example #11
0
    def test_bind_exc(self, log):
        """
        tmp_bind cleans up properly on exceptions.
        """
        log = log.bind(y=23)
        with pytest.raises(ValueError):
            with tmp_bind(log, x=42, y="foo") as tmp_log:
                assert ({
                    "y": "foo",
                    "x": 42
                } == tmp_log._context._dict == log._context._dict)
                raise ValueError

        assert {"y": 23} == log._context._dict
Example #12
0
    def test_bind_exc(self, log):
        """
        tmp_bind cleans up properly on exceptions.
        """
        log = log.bind(y=23)
        with pytest.raises(ValueError):
            with tmp_bind(log, x=42, y="foo") as tmp_log:
                assert (
                    {"y": "foo", "x": 42}
                    == tmp_log._context._dict
                    == log._context._dict
                )
                raise ValueError

        assert {"y": 23} == log._context._dict
def build_combined_dataset_from_sources(
    target_dataset_cls: Type[dataset_base.DatasetBase],
    feature_definition_config: FeatureDataSourceMap,
    filters: List[dataset_filter.DatasetFilter] = None,
):
    """Builds a combined dataset from a feature definition.

    Args:
        target_dataset_cls: Target dataset class.
        feature_definition_config: Dictionary mapping an output field to the
            data sources that will be used to pull values from.
        filters: A list of dataset filters applied to the datasets before
            assembling features.
    """
    loaded_data_sources = load_data_sources(feature_definition_config)

    # Convert data sources to instances of `target_data_cls`.
    intermediate_datasets = {
        data_source_cls: target_dataset_cls.build_from_data_source(source)
        for data_source_cls, source in loaded_data_sources.items()
    }

    # Apply filters to datasets.
    for key in intermediate_datasets:
        dataset = intermediate_datasets[key]
        for data_filter in filters or []:
            dataset = data_filter.apply(dataset)
        intermediate_datasets[key] = dataset

    # Build feature columns from feature_definition_config.
    data = pd.DataFrame({})
    # structlog makes it very easy to bind extra attributes to `log` as it is passed down the stack.
    log = structlog.get_logger()
    for field, data_source_classes in feature_definition_config.items():
        for data_source_cls in data_source_classes:
            dataset = intermediate_datasets[data_source_cls]
            with tmp_bind(log, dataset_name=data_source_cls.SOURCE_NAME, field=field) as log:
                try:
                    data = dataset_utils.fill_fields_with_data_source(
                        log, data, dataset.data, target_dataset_cls.INDEX_FIELDS, [field],
                    )
                except Exception:
                    log.exception("trying to fill fields")
                    raise

    return target_dataset_cls(data)
def _process_one_task_mapping_work_item(
    work_item: ProjectTaskMappingWorkItem,
    evg_api: EvergreenApi,
    mongo: MongoWrapper,
    after_date: datetime,
) -> None:
    """
    Process a task mapping work item.

    :param work_item: Task mapping to create.
    :param evg_api: An instance of the evg_api client
    :param mongo: An instance of MongoWrapper.
    :param after_date: The date at which to start analyzing commits of the project.
    """
    with tmp_bind(LOGGER, project=work_item.project, evergreen_module=work_item.module) as log:
        log.info("Starting task mapping work item processing for work_item")
        if _seed_task_mappings_for_project(evg_api, mongo, work_item, after_date, log):
            work_item.complete(mongo.task_mappings_queue())
Example #15
0
def _build_dataframe(
    feature_definitions: Mapping[str, List[str]],
    datasource_dataframes: Mapping[str, pd.DataFrame],
    override=Override.BY_TIMESERIES,
) -> pd.DataFrame:
    # structlog makes it very easy to bind extra attributes to `log` as it is passed down the stack.
    log = structlog.get_logger()

    # These are columns that are expected to have a single value for each FIPS. Get the columns
    # from every row of each data source and then keep one of each unique row.
    preserve_columns = [
        CommonFields.AGGREGATE_LEVEL,
        CommonFields.STATE,
        CommonFields.COUNTRY,
        CommonFields.COUNTY,
    ]
    all_identifiers = pd.concat(
        df.reset_index().loc[:, [CommonFields.FIPS] +
                             list(df.columns.intersection(preserve_columns))]
        for df in datasource_dataframes.values()).drop_duplicates()
    # Make a DataFrame with a unique FIPS index. If multiple rows are found with the same FIPS then there
    # are rows in the input data sources that have different values for county name, state etc.
    fips_indexed = all_identifiers.set_index(CommonFields.FIPS,
                                             verify_integrity=True)

    # Inspired by pd.Series.combine_first(). Create a new index which is a union of all the input dataframe
    # index.
    dataframes = list(datasource_dataframes.values())
    new_index = dataframes[0].index
    for df in dataframes[1:]:
        new_index = new_index.union(df.index)
    # Override.BY_ROW needs to preserve the rows of the input dataframes. If not going BY_ROW
    # reindex the inputs now to avoid reindexing for each field below.
    if override is not Override.BY_ROW:
        datasource_dataframes = {
            name: df.reindex(new_index, copy=False)
            for name, df in datasource_dataframes.items()
        }

    # Build feature columns from feature_definitions.
    data = pd.DataFrame(index=new_index)
    for field_name, data_source_names in feature_definitions.items():
        log.info("Working field", field=field_name)
        field_out = None
        # Go through the data sources, starting with the highest priority.
        for datasource_name in reversed(data_source_names):
            with tmp_bind(log, dataset_name=datasource_name,
                          field=field_name) as log:
                datasource_field_in = datasource_dataframes[datasource_name][
                    field_name]
                if field_out is None:
                    # Copy all values from the highest priority input to the output
                    field_out = datasource_field_in
                elif override == Override.BY_TIMESERIES:
                    keep_higher_priority = field_out.groupby(
                        level=[CommonFields.FIPS]).transform(
                            lambda x: x.notna().any())
                    # Copy from datasource_field_in only on rows where all rows of field_out with that FIPS are NaN.
                    field_out = field_out.where(keep_higher_priority,
                                                datasource_field_in)
                elif override == Override.BY_TIMESERIES_POINT:
                    # Copy from datasource_field_in only on rows where field_out is NaN
                    field_out = field_out.where(pd.notna(field_out),
                                                datasource_field_in)
                else:
                    assert override == Override.BY_ROW
                    # Copy from datasource_field_in rows that are not yet in field_out
                    this_not_in_result = ~datasource_field_in.index.isin(
                        field_out.index)
                    field_out = field_out.append(
                        datasource_field_in.loc[this_not_in_result])
                dups = field_out.index.duplicated(keep=False)
                if dups.any():
                    log.error("Found duplicates in index")
                    raise ValueError(
                    )  # This is bad, somehow the input /still/ has duplicates
        data.loc[:, field_name] = field_out

    if not fips_indexed.empty:
        # See https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-with-two-multiindexes
        data = data.join(fips_indexed, on=["fips"], how="left")

    return data
Example #16
0
 def test_yields_a_new_bound_loggger_if_called_on_lazy_proxy(self, log):
     with tmp_bind(log, x=42) as tmp_log:
         assert "x=42 event='bar'" == tmp_log.msg('bar')
     assert "event='bar'" == log.msg('bar')