def cast_any(self, value, fmt=None): if isinstance(value, self.python_type): return value try: return parse_datetime_day_first(value) except (TypeError, ValueError) as e: raise_with_traceback(InvalidDateType(e))
def cast_any(self, value, fmt=None): if isinstance(value, self.python_type): return value try: return date_parse(value, dayfirst=True).date() except (TypeError, ValueError) as e: raise_with_traceback(InvalidDateType(e))
def inner(*args, **kwargs): """Wraps specified exceptions""" try: return a_func(*args, **kwargs) # pylint: disable=catching-non-exception except tuple(errors) as exception: raise_with_traceback(GaxError('RPC failed', cause=exception))
def _get_channel_number(section_name): match = re.match(r'^.*_channel<(\d{4})>$', section_name) if match: result = int(match.group(1)) return result else: raise_with_traceback(RuntimeError("Unable to detect channel name from \"%s\""%section_name))
def _type_check(self, type_constraint, datum, is_input): """Typecheck a PTransform related datum according to a type constraint. This function is used to optionally type-check either an input or an output to a PTransform. Args: type_constraint: An instance of a typehints.TypeContraint, one of the white-listed builtin Python types, or a custom user class. datum: An instance of a Python object. is_input: True if 'datum' is an input to a PTransform's DoFn. False otherwise. Raises: TypeError: If 'datum' fails to type-check according to 'type_constraint'. """ datum_type = 'input' if is_input else 'output' try: check_constraint(type_constraint, datum) except CompositeTypeHintError as e: raise_with_traceback(TypeCheckError(e.args[0])) except SimpleTypeHintError: error_msg = ("According to type-hint expected %s should be of type %s. " "Instead, received '%s', an instance of type %s." % (datum_type, type_constraint, datum, type(datum))) raise_with_traceback(TypeCheckError(error_msg))
def _run_pants_with_retry(self, port, retries=3): """Runs pants remotely with retry and recovery for nascent executions.""" attempt = 1 while 1: logger.debug( 'connecting to pantsd on port {} (attempt {}/{})'.format(port, attempt, retries) ) try: return self._connect_and_execute(port) except self.RECOVERABLE_EXCEPTIONS as e: if attempt > retries: raise self.Fallback(e) self._backoff(attempt) logger.warn( 'pantsd was unresponsive on port {}, retrying ({}/{})' .format(port, attempt, retries) ) # One possible cause of the daemon being non-responsive during an attempt might be if a # another lifecycle operation is happening concurrently (incl teardown). To account for # this, we won't begin attempting restarts until at least 1 second has passed (1 attempt). if attempt > 1: port = self._restart_pantsd() attempt += 1 except NailgunClient.NailgunError as e: # Ensure a newline. logger.fatal('') logger.fatal('lost active connection to pantsd!') raise_with_traceback( self.Terminated, 'abruptly lost active connection to pantsd runner: {!r}'.format(e) )
def connect(self): """ This method causes the `Duct` instance to connect to the service, if it is not already connected. It is not normally necessary for a user to manually call this function, since when a connection is required, it is automatically created. Subclasses should implement `Duct._connect` to do whatever is necessary to bring a connection into being. Compared to base `Duct.connect`, this method will automatically catch the first `DuctAuthenticationError` error triggered by `Duct.connect` if any smartcards have been configured, before trying once more. Returns: `Duct` instance: A reference to the current object. """ try: Duct.connect(self) except DuctServerUnreachable as e: raise_with_traceback(e) except DuctAuthenticationError as e: if self.smartcards and self.prepare_smartcards(): Duct.connect(self) else: raise_with_traceback(e) return self
def jsonify(obj, pretty=False): """ Turn a nested object into a (compressed) JSON string. Parameters ---------- obj : dict Any kind of dictionary structure. pretty : bool, optional Whether to format the resulting JSON in a more legible way ( default False). """ if pretty: params = dict(sort_keys=True, indent=2, allow_nan=False, separators=(",", ": "), ensure_ascii=False) else: params = dict(sort_keys=False, indent=None, allow_nan=False, separators=(",", ":"), ensure_ascii=False) try: return json.dumps(obj, **params) except (TypeError, ValueError) as error: LOGGER.critical( "The memote result structure is incompatible with the JSON " "standard.") log_json_incompatible_types(obj) raise_with_traceback(error)
def __call__(self, *args, **kwargs): if not current_app.config.get('INDEXING_ENABLED', True): return self._route(*args, **kwargs) log = PageView( page=request.full_path, endpoint=request.endpoint, user_id=current_user.id, ip_address=request.remote_addr, version=__version__ ) errorlog = None log.object_id, log.object_type, log.object_action, reextract_after_request = self.extract_objects(*args, **kwargs) db_session.add(log) # Add log here to ensure pageviews are accurate try: return self._route(*args, **kwargs) except Exception as e: db_session.rollback() # Ensure no lingering database changes remain after crashed route db_session.add(log) errorlog = ErrorLog.from_exception(e) db_session.add(errorlog) db_session.commit() raise_with_traceback(e) finally: # Extract object id and type after response generated (if requested) to ensure # most recent data is collected if reextract_after_request: log.object_id, log.object_type, log.object_action, _ = self.extract_objects(*args, **kwargs) if errorlog is not None: log.id_errorlog = errorlog.id db_session.add(log) db_session.commit()
def inner(*args, **kwargs): """Wraps specified exceptions""" try: return a_func(*args, **kwargs) # pylint: disable=catching-non-exception except tuple(to_catch) as exception: utils.raise_with_traceback(errors.create_error("RPC failed", cause=exception))
def start_response(status, response_headers, exc_info=None): if exc_info: try: if headers_sent: # Re-raise if too late raise_with_traceback(exc_info[0](exc_info[1])) finally: exc_info = None # avoid dangling circular ref else: assert not headers_set, 'Headers already set!' assert type(status) is str, 'Status must be a string' assert len(status) >= 4, 'Status must be at least 4 characters' assert int(status[:3]), 'Status must begin with 3-digit code' assert status[3] == ' ', 'Status must have a space after code' assert type(response_headers) is list, 'Headers must be a list' if FCGI_DEBUG: logging.debug('response headers:') for name, val in response_headers: assert type(name) is str, 'Header name "%s" must be a string' % name assert type(val) is str, 'Value of header "%s" must be a string' % name logging.debug('%s: %s' % (name, val)) headers_set[:] = [status, response_headers] return write
def cast_default(self, value): try: struct_time = time.strptime(value, self.ISO8601) return datetime.time(struct_time.tm_hour, struct_time.tm_min, struct_time.tm_sec) except (TypeError, ValueError) as e: raise_with_traceback(exceptions.InvalidTimeType(e))
def _execute(self, statement, cursor, wait, session_properties): """ If something goes wrong, `PrestoClient` will attempt to parse the error log and present the user with useful debugging information. If that fails, the full traceback will be raised instead. """ from pyhive import presto # Imported here due to slow import performance in Python 3 from pyhive.exc import DatabaseError # Imported here due to slow import performance in Python 3 try: cursor = cursor or presto.Cursor( host=self.host, port=self.port, username=self.username, password=self.password, catalog=self.catalog, schema=self.schema, session_props=session_properties, poll_interval=1, source=self.source, protocol=self.server_protocol ) cursor.execute(statement) status = cursor.poll() if wait: logger.progress(0) # status None means command executed successfully # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234 while status is not None and status['stats']['state'] != "FINISHED": if status['stats'].get('totalSplits', 0) > 0: pct_complete = round(status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4) logger.progress(pct_complete * 100) status = cursor.poll() logger.progress(100, complete=True) return cursor except (DatabaseError, pandas.io.sql.DatabaseError) as e: # Attempt to parse database error, before ultimately reraising the same # exception, maintaining the full stacktrace. exception, exception_args, traceback = sys.exc_info() try: message = e.args[0] if isinstance(message, six.string_types): message = ast.literal_eval(re.match("[^{]*({.*})[^}]*$", message).group(1)) linenumber = message['errorLocation']['lineNumber'] - 1 splt = statement.splitlines() splt[linenumber] += ' <-- {errorType} ({errorName}) occurred. {message} '.format(**message) context = '\n\n[Error Context]\n{}\n'.format('\n'.join([splt[l] for l in range(max(linenumber - 1, 0), min(linenumber + 2, len(splt)))])) class ErrContext(object): def __repr__(self): return context # logged twice so that both notebook and console users see the error context exception_args.args = [exception_args, ErrContext()] logger.error(context) except: logger.warn(("Omniduct was unable to parse the database error messages. Refer to the " "traceback below for full error details.")) if isinstance(exception, type): exception = exception(exception_args) raise_with_traceback(exception, traceback)
def __setattr__(self, name, value): if name not in self._parameters.keys(): return object.__setattr__(self, name, value) else: if self._parameters[name][1] == type(value): self._parameters[name] = (value, type(value)) else: raise_with_traceback(TypeError("Invalid type %s for parameter \"%s\""%(type(value), name)))
def reraise(): """Reraise the current contextmanager exception, if any""" typ,val,tb = _exc_info.get(get_ident(), nones) if typ: try: raise_with_traceback(typ(val)) finally: del typ,val,tb
def wrapped(*args, **kwargs): try: return function(*args, **kwargs) except Exception as e: db_session.rollback() db_session.add(ErrorLog.from_exception(e)) db_session.commit() raise_with_traceback(e)
def raise_critical_error(self, err): """ This logs the error, releases any lock files and throws an exception. The expectation is that the application exits after this. """ self.logger.critical(err) self._run_exit_hooks() raise_with_traceback(CriticalApplicationError(err))
def exception_traceback_example(): """ Throw an exception with traceback >>> exception_traceback_example() Traceback (most recent call last): ValueError: exceptional """ from future.utils import raise_with_traceback raise_with_traceback(ValueError('exceptional'))
def wrapper(self, method, args, kwargs): try: result = method(*args, **kwargs) except TypeCheckError as e: error_msg = ('Runtime type violation detected within ParDo(%s): ' '%s' % (self.full_label, e)) raise_with_traceback(TypeCheckError(error_msg)) else: return self._check_type(result)
def cast_binary(self, value): if not self._type_check(value): raise exceptions.InvalidStringType() try: base64.b64decode(value) except binascii.Error as e: raise_with_traceback(exceptions.InvalidBinary(e)) return value
def _raise_deferred_exc(self): """Raises deferred exceptions from the daemon's synchronous path in the post-fork client.""" if self._deferred_exception: try: exc_type, exc_value, exc_traceback = self._deferred_exception raise_with_traceback(exc_value, exc_traceback) except TypeError: # If `_deferred_exception` isn't a 3-item tuple (raising a TypeError on the above # destructuring), treat it like a bare exception. raise self._deferred_exception
def cast_default(self, value): if isinstance(value, self.py): return value try: json_value = json.loads(value) if isinstance(json_value, self.py): return json_value else: raise exceptions.InvalidObjectType() except (TypeError, ValueError) as e: raise_with_traceback(exceptions.InvalidObjectType(e))
def decode(data, encoding='utf-8'): assert encoding == 'utf-8', "Only UTF-8 encoding is currently supported." if encoding is not None: try: data = data.decode(encoding) except Exception as e: if os.environ.get('DEBUG'): raise_with_traceback(e) logger.warning("An decoding error has occurred... continuing anyway. To capture these errors, rerun the current command prefixed with `DEBUG=1 `.") data = data.decode(encoding, errors='ignore') return data
def logging_scope(func, *args, **kwargs): logger._scope_enter(name, *wargs, **wkwargs) success = True try: f = func(*args, **kwargs) return f except Exception as e: success = False raise_with_traceback(e) finally: logger._scope_exit(success)
def _raise_deferred_exc(self): """Raises deferred exceptions from the daemon's synchronous path in the post-fork client.""" if self._deferred_exception: try: # Expect `_deferred_exception` to be a 3-item tuple of the values returned by sys.exc_info(). # This permits use the 3-arg form of the `raise` statement to preserve the original traceback. exc_type, exc_value, exc_traceback = self._deferred_exception raise_with_traceback(exc_type(exc_value), exc_traceback) except ValueError: # If `_deferred_exception` isn't a 3-item tuple, treat it like a bare exception. raise self._deferred_exception
def add_input(self, accumulator, element, *args, **kwargs): if self._input_type_hint: try: _check_instance_type( self._input_type_hint[0][0].tuple_types[1], element, 'element', True) except TypeCheckError as e: error_msg = ('Runtime type violation detected within %s: ' '%s' % (self._label, e)) raise_with_traceback(TypeCheckError(error_msg)) return self._combinefn.add_input(accumulator, element, *args, **kwargs)
def extract_output(self, accumulator, *args, **kwargs): result = self._combinefn.extract_output(accumulator, *args, **kwargs) if self._output_type_hint: try: _check_instance_type( self._output_type_hint.tuple_types[1], result, None, True) except TypeCheckError as e: error_msg = ('Runtime type violation detected within %s: ' '%s' % (self._label, e)) raise_with_traceback(TypeCheckError(error_msg)) return result
def _get_attr(self, attr_name, default_value=None): try: attr_value = self.attrs[attr_name] except KeyError: if default_value is None: raise_with_traceback(IpcMessageException("Missing attribute " + attr_name)) else: attr_value = default_value return attr_value
def cast_uuid(self, value): """Return `value` if is a uuid, else return False.""" if not self._type_check(value): raise exceptions.InvalidStringType( '{0} is not of type {1}'.format(value, self.py) ) try: uuid.UUID(value, version=4) return value except ValueError as e: raise_with_traceback(exceptions.InvalidUUID(e))
def get_param(self, param_name, default_value=None): try: param_value = self.attrs['params'][param_name] except KeyError: if default_value is None: raise_with_traceback(IpcMessageException("Missing parameter " + param_name)) else: param_value = default_value return param_value
def two_dim_dict_param(obj, param_name, key_type=string_types, value_type=None): if not isinstance(obj, dict): raise_with_traceback(_param_type_mismatch_exception(obj, dict, param_name)) return _check_two_dim_key_value_types(obj, key_type, param_name, value_type)
def urlencode(query, doseq=False, safe='', encoding=None, errors=None): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each sequence element is converted to a separate parameter. If the query arg is a sequence of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. The query arg may be either a string or a bytes type. When query arg is a string, the safe, encoding and error parameters are sent the quote_plus for encoding. """ if hasattr(query, "items"): query = query.items() else: # It's a bother at times that strings and string-like objects are # sequences. try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # Zero-length sequences of all types will get here and succeed, # but that's a minor nit. Since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty, va, tb = sys.exc_info() raise_with_traceback( TypeError("not a valid non-string sequence " "or mapping object"), tb) l = [] if not doseq: for k, v in query: if isinstance(k, bytes): k = quote_plus(k, safe) else: k = quote_plus(str(k), safe, encoding, errors) if isinstance(v, bytes): v = quote_plus(v, safe) else: v = quote_plus(str(v), safe, encoding, errors) l.append(k + '=' + v) else: for k, v in query: if isinstance(k, bytes): k = quote_plus(k, safe) else: k = quote_plus(str(k), safe, encoding, errors) if isinstance(v, bytes): v = quote_plus(v, safe) l.append(k + '=' + v) elif isinstance(v, str): v = quote_plus(v, safe, encoding, errors) l.append(k + '=' + v) else: try: # Is this a sufficient test.py for sequence-ness? x = len(v) except TypeError: # not a sequence v = quote_plus(str(v), safe, encoding, errors) l.append(k + '=' + v) else: # loop over the sequence for elt in v: if isinstance(elt, bytes): elt = quote_plus(elt, safe) else: elt = quote_plus(str(elt), safe, encoding, errors) l.append(k + '=' + elt) return str('&').join(l)
def numeric_param(obj: Any, param_name: str) -> Numeric: if not isinstance(obj, (int, float)): raise_with_traceback( _param_type_mismatch_exception(obj, (int, float), param_name)) return obj
def failed(desc: str) -> NoReturn: # type: ignore[misc] if not isinstance(desc, str): raise_with_traceback(CheckError("desc argument must be a string")) raise_with_traceback( CheckError("Failure condition: {desc}".format(desc=desc)))
def subclass(obj, superclass, desc=None): if not issubclass(obj, superclass): raise_with_traceback(_type_mismatch_error(obj, superclass, desc)) return obj
def _score_population(predictions_location, cf_dir_location): """ Scores estimations of treatment effect size over the population. Args: predictions_location (str): Path to a single tabular file where the effect estimations are located. Files must of tabular format * containing 4 columns: HEADER_POP_IDX, HEADER_EFFECT_SIZE, HEADER_CI_LEFT, HEADER_CI_RIGHT. * delimited by TABULAR_DELIMITER. * have FILENAME_EXTENSION extension to them. These global variables specified above can be changed when importing the module. cf_dir_location (str): Path to a directory containing the counter-factual files (i.e. labeled, ground-truth data). Files must be of tabular format * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0. * delimited by TABULAR_DELIMITER. * have the suffix specified in COUNTERFACTUAL_FILE_SUFFIX. * have FILENAME_EXTENSION extension to them. These global variables specified above can be changed when importing the module. Returns: pd.Series: Scores. Where Series' Index is the metric name and the value is the evaluation of that metric. """ ufids = os.listdir(cf_dir_location) ufids = [ f.rsplit(COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION)[0] for f in ufids if f.lower().endswith(COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION) ] # Gather scoring statistics: ratio = pd.Series(index=ufids, name="population_ratio") bias = pd.Series(index=ufids, name="population_bias") ci_size = pd.Series(index=ufids, name="population_ci-size") coverage = pd.Series(data=False, index=ufids, dtype=np.dtype(bool), name="population_coverage") # Get data: # HEADER_POP_IDX | HEADER_EFFECT_SIZE | HEADER_CI_LEFT | HEADER_CI_RIGHT estimates = pd.read_csv(predictions_location, index_col=HEADER_POP_IDX, sep=TABULAR_DELIMITER) if set(ufids) - set(estimates.index): raise_with_traceback( AssertionError( "Seems there are ground-truth files with no corresponding predictions\n" "Unmatched files are:\n" + "\n".join( [str(i) for i in (set(ufids) - set(estimates.index))]))) true_effects = pd.Series(index=ufids) dataset_sizes = pd.Series(index=ufids, name="size") for ufid in ufids: # Get the true effect: gt = pd.read_csv(os.path.join( cf_dir_location, ufid + COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION), sep=TABULAR_DELIMITER) true_effect = np.mean(gt[HEADER_Y1] - gt[HEADER_Y0]) true_effects[ufid] = true_effect # Get the population estimates: | HEADER_EFFECT_SIZE | HEADER_CI_LEFT | HEADER_CI_RIGHT | estimate = estimates.loc[ ufid, :] # No need to "try:" due to content assertion above # Calculate the sufficient statistics: ratio[ufid] = (estimate[HEADER_EFFECT_SIZE] + EPSILON) / (true_effect + EPSILON) bias[ufid] = estimate[HEADER_EFFECT_SIZE] - true_effect ci_size[ufid] = estimate[HEADER_CI_RIGHT] - estimate[ HEADER_CI_LEFT] # right - left -> non-negative coverage[ufid] = estimate[HEADER_CI_LEFT] <= true_effect <= estimate[ HEADER_CI_RIGHT] # Save the size of the current dataset: dataset_sizes[ufid] = gt.index.size dataset_sizes = dataset_sizes.astype(int) # type: pd.Series # Calculate metrics enormse = 1.0 - ratio # type: pd.Series encis = ci_size / (true_effects.abs() + EPSILON) # type: pd.Series cic = bias.abs() / ci_size # Aggregate by sizes: enormse_by_size = enormse.pow(2).groupby(by=dataset_sizes).mean().pow(0.5) rmse_by_size = bias.pow(2).groupby(by=dataset_sizes).mean().pow(0.5) bias_by_size = bias.groupby(by=dataset_sizes).mean() coverage_by_size = coverage.groupby(by=dataset_sizes).mean() encis_by_size = encis.groupby(by=dataset_sizes).mean() cic_by_size = cic.groupby(by=dataset_sizes).mean() results = pd.Series() if dataset_sizes.nunique() == 1: # return the by_sizes, they are enough since there's one size so just extract the scalar value they hold results["enormse"] = enormse_by_size.iloc[0] results["rmse"] = rmse_by_size.iloc[0] results["bias"] = bias_by_size.iloc[0] results["coverage"] = coverage_by_size.iloc[0] results["encis"] = encis_by_size.iloc[0] results["cic"] = cic_by_size.iloc[0] else: # weighted_sum = lambda x, w: x.mul(w).sum() / w.sum() def weighted_sum(x, w): return x.mul(w).sum() / w.sum() # Calculate the Weights for aggregation: weights = __get_weights(dataset_sizes) # Aggregate results["enormse"] = np.sqrt( weighted_sum(enormse_by_size.pow(2), weights)) results["rmse"] = np.sqrt(weighted_sum(rmse_by_size.pow(2), weights)) results["bias"] = weighted_sum(bias_by_size, weights) results["coverage"] = weighted_sum(coverage_by_size, weights) results["encis"] = weighted_sum(encis_by_size, weights) results["cic"] = weighted_sum(cic_by_size, weights) results = results.append(enormse_by_size.add_prefix("enormse_")) return results
def opt_nonempty_str_param(obj, param_name, default=None): if obj is not None and not isinstance(obj, string_types): raise_with_traceback(_param_type_mismatch_exception(obj, str, param_name)) return default if obj is None or obj == '' else obj
def opt_int_param(obj, param_name, default=None): if obj is not None and not isinstance(obj, integer_types): raise_with_traceback( _param_type_mismatch_exception(obj, int, param_name)) return default if obj is None else obj
def int_param(obj, param_name): if not isinstance(obj, integer_types): raise_with_traceback( _param_type_mismatch_exception(obj, int, param_name)) return obj
def opt_callable_param(obj, param_name, default=None): if obj is not None and not callable(obj): raise_with_traceback(_not_callable_exception(obj, param_name)) return default if obj is None else obj
def callable_param(obj, param_name): if not callable(obj): raise_with_traceback(_not_callable_exception(obj, param_name)) return obj
def inst_param(obj, param_name, ttype, additional_message=None): if not isinstance(obj, ttype): raise_with_traceback( _param_type_mismatch_exception( obj, ttype, param_name, additional_message=additional_message)) return obj
def not_implemented(desc): if not is_str(desc): raise_with_traceback(CheckError("desc argument must be a string")) raise_with_traceback( NotImplementedCheckError("Not implemented: {desc}".format(desc=desc)))
def tuple_param(obj, param_name): if not isinstance(obj, tuple): raise_with_traceback(_param_type_mismatch_exception(obj, tuple, param_name)) return obj
def bool_param(obj, param_name): if not isinstance(obj, bool): raise_with_traceback(_param_type_mismatch_exception(obj, bool, param_name)) return obj
def opt_int_param(obj, param_name): if obj is not None and not isinstance(obj, int): raise_with_traceback(_param_type_mismatch_exception(obj, int, param_name)) return obj
def float_param(obj, param_name): if not isinstance(obj, float): raise_with_traceback( _param_type_mismatch_exception(obj, float, param_name)) return obj
def inst(obj, ttype, desc=None): if not isinstance(obj, ttype): raise_with_traceback(_type_mismatch_error(obj, ttype, desc)) return obj
def str_param(obj, param_name): if not _is_str(obj): raise_with_traceback(_param_type_mismatch_exception(obj, str, param_name)) return obj
def opt_numeric_param(obj, param_name): if obj is not None and not isinstance(obj, (int, float)): raise_with_traceback( _param_type_mismatch_exception(obj, (int, float), param_name)) return obj
def _score_individual(predictions_location, cf_dir_location): """ Scores estimations of treatment effect size on individuals (i.e. the prediction of both outcome under no treatment and outcome under positive treatment for each individual). Args: predictions_location (str): Path to a directory containing tabular files with individual effect estimations (i.e. prediction of factual and counterfactual outcomes for each individual). Files must of tabular format * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0. * delimited by TABULAR_DELIMITER. * have FILENAME_EXTENSION extension to them. These global variables specified above can be changed when importing the module. cf_dir_location (str): Path to a directory containing the counter-factual files (i.e. labeled, ground-truth data). Files must be of tabular format * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0. * delimited by TABULAR_DELIMITER. * have the suffix specified in COUNTERFACTUAL_FILE_SUFFIX. * have FILENAME_EXTENSION extension to them. These global variables specified above can be changed when importing the module. Returns: pd.Series: Scores. Where Series' Index is the metric name and the value is the evaluation of that metric. """ ufids = os.listdir(predictions_location) ufids = [ f.rsplit(FILENAME_EXTENSION)[0] for f in ufids if f.lower().endswith(FILENAME_EXTENSION) ] enormse = pd.Series(index=ufids, name="individual_enormse") rmse = pd.Series(index=ufids, name="individual_rmse") bias = pd.Series(index=ufids, name="individual_bias") dataset_sizes = pd.Series(index=ufids, name="size") for ufid in ufids: # Get the true effect: gt = pd.read_csv(os.path.join( cf_dir_location, ufid + COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION), index_col=HEADER_IND_IDX, sep=TABULAR_DELIMITER) true_effect = gt[HEADER_Y1] - gt[HEADER_Y0] # Get estimated effect: submission format: N rows: patient_ID | Y0 | Y1 try: estimates = pd.read_csv(os.path.join(predictions_location, ufid + FILENAME_EXTENSION), index_col=HEADER_IND_IDX, sep=TABULAR_DELIMITER) except IOError as e: # Python 2 compatible for FileNotFoundError e.args = ( e.args[0] + "\n\t" + "A prediction might be missing.\n" "Seems that the file ({fn}) was found in the ground-truth directory but no " "corresponding estimation was found in your " "predictions.".format(fn=ufid), ) + e.args[1:] raise_with_traceback(e) estimated_effect = estimates[HEADER_Y1] - estimates[HEADER_Y0] # Evaluate: individual_bias = estimated_effect - true_effect bias[ufid] = individual_bias.mean() rmse[ufid] = individual_bias.pow(2).mean() # enormse[ufid] = np.mean((individual_bias) / (true_effect + EPSILON) ** 2) enormse[ufid] = np.mean( (1 - ((estimated_effect + EPSILON) / (true_effect + EPSILON)))**2) # Save the size of the current dataset: dataset_sizes[ufid] = gt.index.size dataset_sizes = dataset_sizes.astype(int) # type: pd.Series enormse_by_size = enormse.groupby(by=dataset_sizes).mean().pow(0.5) rmse_by_size = rmse.groupby(by=dataset_sizes).mean().pow(0.5) bias_by_size = bias.groupby(by=dataset_sizes).mean() results = pd.Series() if dataset_sizes.nunique() == 1: results["enormse"] = enormse_by_size.iloc[0] results["rmse"] = rmse_by_size.iloc[0] results["bias"] = bias_by_size.iloc[0] else: weights = __get_weights(dataset_sizes) results["enormse"] = np.sqrt( enormse_by_size.pow(2).mul(weights).sum() / weights.sum()) results["rmse"] = np.sqrt( rmse_by_size.pow(2).mul(weights).sum() / weights.sum()) results["bias"] = bias_by_size.mul(weights).sum() / weights.sum() results = results.append(enormse_by_size.add_prefix("enormse_")) return results
def failed(desc): if not _is_str(desc): raise_with_traceback(CheckError('desc argument must be a string')) raise_with_traceback(CheckError('Failure condition: {desc}'.format(desc=desc)))
def find_direct_metabolites(model, reaction, tolerance=1E-06): """ Return list of possible direct biomass precursor metabolites. The term direct metabolites describes metabolites that are involved only in either transport and/or boundary reactions, AND the biomass reaction(s), but not in any purely metabolic reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.Reaction The biomass reaction of the model under investigation. tolerance : float, optional Tolerance below which values will be regarded as zero. Returns ------- list Metabolites that qualify as direct metabolites i.e. biomass precursors that are taken up to be consumed by the biomass reaction only. """ biomass_rxns = set(helpers.find_biomass_reaction(model)) tra_bou_bio_rxns = helpers.find_interchange_biomass_reactions( model, biomass_rxns) try: precursors = find_biomass_precursors(model, reaction) main_comp = helpers.find_compartment_id_in_model(model, 'c') ext_space = helpers.find_compartment_id_in_model(model, 'e') except KeyError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback( KeyError("The cytosolic and/or extracellular " "compartments could not be identified.")) except RuntimeError: LOGGER.error("Failed to properly identify cytosolic and extracellular " "compartments.") raise_with_traceback( RuntimeError("The cytosolic and/or extracellular " "compartments could not be " "identified.")) else: tra_bou_bio_mets = [ met for met in precursors if met.reactions.issubset(tra_bou_bio_rxns) ] rxns_of_interest = set([ rxn for met in tra_bou_bio_mets for rxn in met.reactions if rxn not in biomass_rxns ]) solution = model.optimize(raise_error=True) if np.isclose(solution.objective_value, 0, atol=tolerance): LOGGER.error("Failed to generate a non-zero objective value with " "flux balance analysis.") raise OptimizationError( "The flux balance analysis on this model returned an " "objective value of zero. Make sure the model can " "grow! Check if the constraints are not too strict!") tra_bou_bio_fluxes = {r: solution[r.id] for r in rxns_of_interest} met_flux_sum = {m: 0 for m in tra_bou_bio_mets} return detect_false_positive_direct_metabolites(tra_bou_bio_mets, biomass_rxns, main_comp, ext_space, tra_bou_bio_fluxes, met_flux_sum)
def subclass_param(obj, param_name, superclass): type_param(obj, param_name) if not issubclass(obj, superclass): raise_with_traceback(_param_subclass_mismatch_exception(obj, superclass, param_name)) return obj
def str_param(obj: Any, param_name: str) -> str: if not isinstance(obj, str): raise_with_traceback( _param_type_mismatch_exception(obj, str, param_name)) return obj
def opt_type_param(obj, param_name, default=None): if obj is not None and not isinstance(obj, type): raise_with_traceback(_not_type_param_subclass_mismatch_exception(obj, param_name)) return obj if obj is not None else default
def int_param(obj: Any, param_name: str) -> int: if not isinstance(obj, int): raise_with_traceback( _param_type_mismatch_exception(obj, int, param_name)) return obj
def type_param(obj, param_name): if not isinstance(obj, type): raise_with_traceback(_not_type_param_subclass_mismatch_exception(obj, param_name)) return obj
def param_invariant(condition, param_name, desc=None): if not condition: raise_with_traceback(_param_invariant_exception(param_name, desc))
def exec_one_test(self, test_name, test_method): """Executes one test and update test results. Executes setup_test, the test method, and teardown_test; then creates a records.TestResultRecord object with the execution information and adds the record to the test class's test results. Args: test_name: string, Name of the test. test_method: function, The test method to execute. """ tr_record = records.TestResultRecord(test_name, self.TAG) tr_record.test_begin() self.current_test_info = runtime_test_info.RuntimeTestInfo( test_name, self.log_path, tr_record) expects.recorder.reset_internal_states(tr_record) logging.info('%s %s', TEST_CASE_TOKEN, test_name) # Did teardown_test throw an error. teardown_test_failed = False try: try: try: self._setup_test(test_name) except signals.TestFailure as e: raise_with_traceback(signals.TestError( e.details, e.extras)) test_method() except signals.TestPass: raise except Exception: logging.exception('Exception occurred in %s.', self.current_test_name) raise finally: before_count = expects.recorder.error_count try: self._teardown_test(test_name) except signals.TestAbortSignal: raise except Exception as e: logging.exception(e) tr_record.test_error() tr_record.add_error(STAGE_NAME_TEARDOWN_TEST, e) teardown_test_failed = True else: # Check if anything failed by `expects`. if before_count < expects.recorder.error_count: teardown_test_failed = True except (signals.TestFailure, AssertionError) as e: tr_record.test_fail(e) except signals.TestSkip as e: # Test skipped. tr_record.test_skip(e) except signals.TestAbortSignal as e: # Abort signals, pass along. tr_record.test_fail(e) raise e except signals.TestPass as e: # Explicit test pass. tr_record.test_pass(e) except Exception as e: # Exception happened during test. tr_record.test_error(e) else: # No exception is thrown from test and teardown, if `expects` has # error, the test should fail with the first error in `expects`. if expects.recorder.has_error and not teardown_test_failed: tr_record.test_fail() # Otherwise the test passed. elif not teardown_test_failed: tr_record.test_pass() finally: tr_record.update_record() try: if tr_record.result in ( records.TestResultEnums.TEST_RESULT_ERROR, records.TestResultEnums.TEST_RESULT_FAIL): self._exec_procedure_func(self._on_fail, tr_record) elif tr_record.result == records.TestResultEnums.TEST_RESULT_PASS: self._exec_procedure_func(self._on_pass, tr_record) elif tr_record.result == records.TestResultEnums.TEST_RESULT_SKIP: self._exec_procedure_func(self._on_skip, tr_record) finally: logging.info(RESULT_LINE_TEMPLATE, tr_record.test_name, tr_record.result) self.results.add_record(tr_record) self.summary_writer.dump(tr_record.to_dict(), records.TestSummaryEntryType.RECORD) self.current_test_info = None self.current_test_name = None