def upload_done(self): with threadlocal.tmp_bind(log, file=self.key) as tmp_log: if self.exist_on_remote(): self.state = READY self.save() tmp_log.info('file_upload_state_success') else: tmp_log.warn('file_upload_state_failure', reason='not_on_remote')
def test_bind(self, log): log = log.bind(y=23) with tmp_bind(log, x=42, y='foo') as tmp_log: assert ( {'y': 'foo', 'x': 42} == tmp_log._context._dict == log._context._dict ) assert {'y': 23} == log._context._dict assert "y=23 event='foo'" == log.msg('foo')
def test_bind(self, log): """ tmp_bind does not modify the thread-local state. """ log = log.bind(y=23) with tmp_bind(log, x=42, y='foo') as tmp_log: assert { 'y': 'foo', 'x': 42 } == tmp_log._context._dict == log._context._dict assert {'y': 23} == log._context._dict
def wrapper(state: State) -> Process: with tmp_bind(logger, func=func.__qualname__) as log: step_in_inject_args = inject_args(func) try: with transactional(db, log): result = step_in_inject_args(state) return Success(result) except Exception as ex: return Waiting(ex)
def test_bind(self, log): """ tmp_bind does not modify the thread-local state. """ log = log.bind(y=23) with tmp_bind(log, x=42, y="foo") as tmp_log: assert ({ "y": "foo", "x": 42 } == tmp_log._context._dict == log._context._dict) assert {"y": 23} == log._context._dict
def test_bind(self, log): """ tmp_bind does not modify the thread-local state. """ log = log.bind(y=23) with tmp_bind(log, x=42, y="foo") as tmp_log: assert ( {"y": "foo", "x": 42} == tmp_log._context._dict == log._context._dict ) assert {"y": 23} == log._context._dict
def test_bind_exc(self, log): """ tmp_bind cleans up properly on exceptions. """ log = log.bind(y=23) with pytest.raises(ValueError): with tmp_bind(log, x=42, y='foo') as tmp_log: assert { 'y': 'foo', 'x': 42 } == tmp_log._context._dict == log._context._dict raise ValueError assert {'y': 23} == log._context._dict
def wrapper(state: State) -> Process: with tmp_bind(logger, func=func.__qualname__) as log: step_in_inject_args = inject_args(func) try: with transactional(db, log): result = step_in_inject_args(state) return Success(result) except Exception as ex: log.warning("Step failed", exc_info=ex) return Failed(ex)
def test_bind_exc(self, log): """ tmp_bind cleans up properly on exceptions. """ log = log.bind(y=23) with pytest.raises(ValueError): with tmp_bind(log, x=42, y="foo") as tmp_log: assert ({ "y": "foo", "x": 42 } == tmp_log._context._dict == log._context._dict) raise ValueError assert {"y": 23} == log._context._dict
def test_bind_exc(self, log): """ tmp_bind cleans up properly on exceptions. """ log = log.bind(y=23) with pytest.raises(ValueError): with tmp_bind(log, x=42, y="foo") as tmp_log: assert ( {"y": "foo", "x": 42} == tmp_log._context._dict == log._context._dict ) raise ValueError assert {"y": 23} == log._context._dict
def build_combined_dataset_from_sources( target_dataset_cls: Type[dataset_base.DatasetBase], feature_definition_config: FeatureDataSourceMap, filters: List[dataset_filter.DatasetFilter] = None, ): """Builds a combined dataset from a feature definition. Args: target_dataset_cls: Target dataset class. feature_definition_config: Dictionary mapping an output field to the data sources that will be used to pull values from. filters: A list of dataset filters applied to the datasets before assembling features. """ loaded_data_sources = load_data_sources(feature_definition_config) # Convert data sources to instances of `target_data_cls`. intermediate_datasets = { data_source_cls: target_dataset_cls.build_from_data_source(source) for data_source_cls, source in loaded_data_sources.items() } # Apply filters to datasets. for key in intermediate_datasets: dataset = intermediate_datasets[key] for data_filter in filters or []: dataset = data_filter.apply(dataset) intermediate_datasets[key] = dataset # Build feature columns from feature_definition_config. data = pd.DataFrame({}) # structlog makes it very easy to bind extra attributes to `log` as it is passed down the stack. log = structlog.get_logger() for field, data_source_classes in feature_definition_config.items(): for data_source_cls in data_source_classes: dataset = intermediate_datasets[data_source_cls] with tmp_bind(log, dataset_name=data_source_cls.SOURCE_NAME, field=field) as log: try: data = dataset_utils.fill_fields_with_data_source( log, data, dataset.data, target_dataset_cls.INDEX_FIELDS, [field], ) except Exception: log.exception("trying to fill fields") raise return target_dataset_cls(data)
def _process_one_task_mapping_work_item( work_item: ProjectTaskMappingWorkItem, evg_api: EvergreenApi, mongo: MongoWrapper, after_date: datetime, ) -> None: """ Process a task mapping work item. :param work_item: Task mapping to create. :param evg_api: An instance of the evg_api client :param mongo: An instance of MongoWrapper. :param after_date: The date at which to start analyzing commits of the project. """ with tmp_bind(LOGGER, project=work_item.project, evergreen_module=work_item.module) as log: log.info("Starting task mapping work item processing for work_item") if _seed_task_mappings_for_project(evg_api, mongo, work_item, after_date, log): work_item.complete(mongo.task_mappings_queue())
def _build_dataframe( feature_definitions: Mapping[str, List[str]], datasource_dataframes: Mapping[str, pd.DataFrame], override=Override.BY_TIMESERIES, ) -> pd.DataFrame: # structlog makes it very easy to bind extra attributes to `log` as it is passed down the stack. log = structlog.get_logger() # These are columns that are expected to have a single value for each FIPS. Get the columns # from every row of each data source and then keep one of each unique row. preserve_columns = [ CommonFields.AGGREGATE_LEVEL, CommonFields.STATE, CommonFields.COUNTRY, CommonFields.COUNTY, ] all_identifiers = pd.concat( df.reset_index().loc[:, [CommonFields.FIPS] + list(df.columns.intersection(preserve_columns))] for df in datasource_dataframes.values()).drop_duplicates() # Make a DataFrame with a unique FIPS index. If multiple rows are found with the same FIPS then there # are rows in the input data sources that have different values for county name, state etc. fips_indexed = all_identifiers.set_index(CommonFields.FIPS, verify_integrity=True) # Inspired by pd.Series.combine_first(). Create a new index which is a union of all the input dataframe # index. dataframes = list(datasource_dataframes.values()) new_index = dataframes[0].index for df in dataframes[1:]: new_index = new_index.union(df.index) # Override.BY_ROW needs to preserve the rows of the input dataframes. If not going BY_ROW # reindex the inputs now to avoid reindexing for each field below. if override is not Override.BY_ROW: datasource_dataframes = { name: df.reindex(new_index, copy=False) for name, df in datasource_dataframes.items() } # Build feature columns from feature_definitions. data = pd.DataFrame(index=new_index) for field_name, data_source_names in feature_definitions.items(): log.info("Working field", field=field_name) field_out = None # Go through the data sources, starting with the highest priority. for datasource_name in reversed(data_source_names): with tmp_bind(log, dataset_name=datasource_name, field=field_name) as log: datasource_field_in = datasource_dataframes[datasource_name][ field_name] if field_out is None: # Copy all values from the highest priority input to the output field_out = datasource_field_in elif override == Override.BY_TIMESERIES: keep_higher_priority = field_out.groupby( level=[CommonFields.FIPS]).transform( lambda x: x.notna().any()) # Copy from datasource_field_in only on rows where all rows of field_out with that FIPS are NaN. field_out = field_out.where(keep_higher_priority, datasource_field_in) elif override == Override.BY_TIMESERIES_POINT: # Copy from datasource_field_in only on rows where field_out is NaN field_out = field_out.where(pd.notna(field_out), datasource_field_in) else: assert override == Override.BY_ROW # Copy from datasource_field_in rows that are not yet in field_out this_not_in_result = ~datasource_field_in.index.isin( field_out.index) field_out = field_out.append( datasource_field_in.loc[this_not_in_result]) dups = field_out.index.duplicated(keep=False) if dups.any(): log.error("Found duplicates in index") raise ValueError( ) # This is bad, somehow the input /still/ has duplicates data.loc[:, field_name] = field_out if not fips_indexed.empty: # See https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-with-two-multiindexes data = data.join(fips_indexed, on=["fips"], how="left") return data
def test_yields_a_new_bound_loggger_if_called_on_lazy_proxy(self, log): with tmp_bind(log, x=42) as tmp_log: assert "x=42 event='bar'" == tmp_log.msg('bar') assert "event='bar'" == log.msg('bar')