def __init__(self, readonly=False, provide_run_metadata=None, overwrite='if_broken', take_only=tuple(), exclude=tuple()): """ :param readonly: If True, throws CannotWriteData whenever saving is attempted. :param overwrite: When to overwrite data that already exists. - 'never': Never overwrite any data. - 'if_broken': Only overwrites data if it is incomplete or broken. - 'always': Always overwrite data. Use with caution! :param take_only: Provide/accept only these data types. :param exclude: Do NOT provide/accept these data types. :param provide_run_metadata: Whether to provide run-level metadata (run docs). If None, use class-specific default If take_only and exclude are both omitted, provide all data types. If a data type is listed in both, it will not be provided. Attempting to read/write unwanted data types throws DataTypeNotWanted. """ if overwrite not in 'never if_broken always'.split(): raise RuntimeError(f"Invalid 'overwrite' setting {overwrite}. ") self.take_only = strax.to_str_tuple(take_only) self.exclude = strax.to_str_tuple(exclude) self.overwrite = overwrite if provide_run_metadata is not None: self.provide_run_metadata = provide_run_metadata self.readonly = readonly self.log = logging.getLogger(self.__class__.__name__)
def test_first_run_plugins(self): self.script.purge_below('match_acceptance_extended') for t in strax.to_str_tuple(self.script.target): for r in strax.to_str_tuple(self.script.run_id): if (self.script.st.get_save_when(t) > strax.SaveWhen.NEVER): self.script.st.make(r, t) assert self.script.st.is_stored(r, t)
def available_for_run(self: strax.Context, run_id: str, include_targets: ty.Union[None, list, tuple, str] = None, exclude_targets: ty.Union[None, list, tuple, str] = None, pattern_type: str = 'fnmatch') -> pd.DataFrame: """ For a given single run, check all the targets if they are stored. Excludes the target if never stored anyway. :param run_id: requested run :param include_targets: targets to include e.g. raw_records, raw_records* or *_nv. If multiple targets (e.g. a list) is provided, the target should match any of the arguments! :param exclude_targets: targets to exclude e.g. raw_records, raw_records* or *_nv. If multiple targets (e.g. a list) is provided, the target should match none of the arguments! :param pattern_type: either 'fnmatch' (Unix filename pattern matching) or 're' (Regular expression operations). :return: Table of available data per target """ if not isinstance(run_id, str): raise ValueError(f'Only single run_id is allowed (str),' f' got {run_id} ({type(run_id)})') if exclude_targets is None: exclude_targets = [] if include_targets is None: include_targets = [] is_stored = defaultdict(list) for target in self._plugin_class_registry.keys(): # Skip targets that are not stored save_when = self.get_save_when(target) if save_when == strax.SaveWhen.NEVER: continue # Should we include this target or exclude it? include_t = [] exclude_t = False for excl in strax.to_str_tuple(exclude_targets): # Simple logic, if we match the excluded target, we should # should not continue if _tag_match(target, excl, pattern_type, False): exclude_t = True break # We can match any of the "incl" targets, keep a list and check # of any of the "incl" matches the target. for incl in strax.to_str_tuple(include_targets): include_t.append(_tag_match(target, incl, pattern_type, False)) # Convert to simple bool. If no include_targets is specified, # all are fine, otherwise check at least one is matching. include_t = True if not len(include_t) else any(include_t) if include_t and not exclude_t: is_stored['target'].append(target) is_stored['is_stored'].append(self.is_stored(run_id, target)) return pd.DataFrame(is_stored)
def __init__(self, take_only=tuple(), exclude=tuple(), recover=False, readonly=False): self._take_only = strax.to_str_tuple(take_only) self._exclude = strax.to_str_tuple(exclude) self.recover = recover self.readonly = readonly self.log = logging.getLogger(self.__class__.__name__)
def check_loading_allowed(data, run_id, target, max_in_disallowed=1, disallowed=('event_positions', 'corrected_areas', 'energy_estimates')): """ Check that the loading of the specified targets is not disallowed :param data: chunk of data :param run_id: run_id of the run :param target: list of targets requested by the user :param max_in_disallowed: the max number of targets that are in the disallowed list :param disallowed: list of targets that are not allowed to be loaded simultaneously by the user :return: data :raise: RuntimeError if more than max_in_disallowed targets are requested """ n_targets_in_disallowed = sum( [t in disallowed for t in strax.to_str_tuple(target)]) if n_targets_in_disallowed > max_in_disallowed: raise RuntimeError( f'Don\'t load {disallowed} separately, use "event_info" instead') return data
def pre_apply_function(data, run_id, target, function_name='pre_apply_function'): """ Prior to returning the data (from one chunk) see if any function(s) need to be applied. :param data: one chunk of data for the requested target(s) :param run_id: Single run-id of of the chunk of data :param target: one or more targets :param function_name: the name of the function to be applied. The function_name.py should be stored in the database. :return: Data where the function is applied. """ if function_name not in _resource_cache: # only load the function once and put it in the resource cache function_file = f'{function_name}.py' function_file = _load_function_file_from_home(function_file) function = get_resource(function_file, fmt='txt') # pylint: disable=exec-used exec(function) # Cache the function to reduce reloading & eval operations _resource_cache[function_name] = locals().get(function_name) data = _resource_cache[function_name](data, run_id, strax.to_str_tuple(target)) return data
def __init__(self): if not hasattr(self, 'depends_on'): raise ValueError('depends_on not provided for ' f'{self.__class__.__name__}') self.depends_on = strax.to_str_tuple(self.depends_on) # Store compute parameter names, see if we take chunk_i too compute_pars = list(inspect.signature(self.compute).parameters.keys()) if 'chunk_i' in compute_pars: self.compute_takes_chunk_i = True del compute_pars[compute_pars.index('chunk_i')] if 'start' in compute_pars: if 'end' not in compute_pars: raise ValueError(f"Compute of {self} takes start, " f"so it should also take end.") self.compute_takes_start_end = True del compute_pars[compute_pars.index('start')] del compute_pars[compute_pars.index('end')] if not isinstance(self.save_when, (IntEnum, immutabledict, int)): raise ValueError( 'save_when must be either a SaveWhen object or an immutabledict ' 'representing the different data_types provided.') if hasattr(self, 'provides') and not isinstance( self.save_when, immutabledict): # The ParallelSource plugin does not provide anything as it # inlines only already existing components, therefore we also do # not have to updated save_when self.save_when = immutabledict.fromkeys(self.provides, self.save_when) self.compute_pars = compute_pars self.input_buffer = dict()
def dataframe_to_wiki(df, float_digits=5, title='Awesome table', force_int=tuple()): """Convert a pandas dataframe to a dokuwiki table (which you can copy-paste onto the XENON wiki) :param df: dataframe to convert :param float_digits: Round float-ing point values to this number of digits. :param title: title of the table. """ table = '^ %s ' % title + '^' * (len(df.columns) - 1) + '^\n' table += '^ ' + ' ^ '.join(df.columns) + ' ^\n' def do_round(x): if isinstance(x, float): return round(x, float_digits) return x force_int = np.where( np.in1d(df.columns.values, strax.to_str_tuple(force_int)))[0] for _, row in df.iterrows(): table += "| " + ' | '.join([ str(int(x) if i in force_int else do_round(x)) for i, x in enumerate(row.values.tolist()) ]) + ' |\n' return table
def rekey_dict(d, replace_keys='', with_keys=''): ''' :param d: dictionary that will have its keys renamed :param replace_keys: comma-separated string of keys that will be replaced :param with_keys: comma-separated string of keys that will replace the replace_keys :return: dictionary with renamed keys ''' new_dict = d.copy() replace_keys = strax.to_str_tuple(replace_keys.split(',')) with_keys = strax.to_str_tuple(with_keys.split(',')) if len(replace_keys) != len(with_keys): raise RuntimeError( "replace_keys and with_keys must have the same length") for old_key, new_key in zip(replace_keys, with_keys): new_dict[new_key] = new_dict.pop(old_key) return new_dict
def print_versions(modules=('strax', 'straxen'), return_string=False): """ Print versions of modules installed. :param modules: Modules to print, should be str, tuple or list. E.g. print_versions(modules=('strax', 'straxen', 'wfsim', 'cutax', 'pema')) :param return_string: optional. Instead of printing the message, return a string :return: optional, the message that would have been printed """ message = (f'Working on {socket.getfqdn()} with the following ' f'versions and installation paths:') py_version = sys.version.replace(' (', '\t(').replace('\n', '') message += f"\npython\tv{py_version}" for m in strax.to_str_tuple(modules): try: # pylint: disable=exec-used exec(f'import {m}') # pylint: disable=eval-used message += f'\n{m}\tv{eval(m).__version__}\t{eval(m).__path__[0]}' except (ModuleNotFoundError, ImportError): print(f'{m} is not installed') if return_string: return message print(message)
def get_array(self, run_id: ty.Union[str, tuple, list], targets, save=tuple(), max_workers=None, **kwargs) -> np.ndarray: """Compute target for run_id and return as numpy array {get_docs} """ run_ids = strax.to_str_tuple(run_id) if len(run_ids) > 1: results = strax.multi_run(self.get_array, run_ids, targets=targets, save=save, max_workers=max_workers, **kwargs) else: source = self.get_iter(run_ids[0], targets, save=save, max_workers=max_workers, **kwargs) results = [x.data for x in source] return np.concatenate(results)
def make(self, run_id: ty.Union[str, tuple, list], targets, save=tuple(), max_workers=None, _skip_if_built=True, **kwargs) -> None: """Compute target for run_id. Returns nothing (None). {get_docs} """ # Multi-run support run_ids = strax.to_str_tuple(run_id) if len(run_ids) == 0: raise ValueError("Cannot build empty list of runs") if len(run_ids) > 1: return strax.multi_run(self.get_array, run_ids, targets=targets, throw_away_result=True, save=save, max_workers=max_workers, **kwargs) if _skip_if_built and self.is_stored(run_id, targets): return for _ in self.get_iter(run_ids[0], targets, save=save, max_workers=max_workers, **kwargs): pass
def remap_old(data, targets, run_id, works_on_target=''): """ If the data is of before the time sectors were re-cabled, apply a software remap otherwise just return the data is it is. :param data: numpy array of data with at least the field time. It is assumed the data is sorted by time :param targets: targets in the st.get_array to get :param run_id: required positional argument of apply_function_to_data in strax :param works_on_target: regex match string to match any of the targets. By default set to '' such that any target in the targets would be remapped (which is what we want as channels are present in most data types). If one only wants records (no raw-records) and peaks* use e.g. works_on_target = 'records|peaks'. """ if np.any(data['time'][:2] >= TSTART_FIRST_CORRECTLY_CABLED_RUN): # We leave the 'new' data be pass elif not np.any( [match(works_on_target, t) for t in strax.to_str_tuple(targets)]): # None of the targets are such that we want to remap pass elif len(data): # select the old data and do the remapping for this mask = data['time'] < TSTART_FIRST_CORRECTLY_CABLED_RUN data = data.copy() data[mask] = remap_channels(data[mask]) return data
def define_run(self, name: str, data: ty.Union[np.ndarray, pd.DataFrame, dict], from_run: ty.Union[str, None] = None): if isinstance(data, (pd.DataFrame, np.ndarray)): # Array of events / regions of interest start, end = data['time'], strax.endtime(data) if from_run is not None: return self.define_run(name, {from_run: np.transpose([start, end])}) else: df = pd.DataFrame( dict(starts=start, ends=end, run_id=data['run_id'])) self.define_run( name, { run_id: rs[['start', 'stop']].values.transpose() for run_id, rs in df.groupby('fromrun') }) if isinstance(data, (list, tuple)): # list of runids data = strax.to_str_tuple(data) self.define_run(name, {run_id: 'all' for run_id in data}) if not isinstance(data, dict): raise ValueError("Can't define run from {type(data)}") # Dict mapping run_id: array of time ranges or all for sf in self.storage: if not sf.readonly and sf.can_define_runs: sf.define_run(name, data) break else: raise RuntimeError("No storage frontend registered that allows" " run definition")
def remap_old(data, targets, works_on_target=''): """ If the data is of before the time sectors were re-cabled, apply a software remap otherwise just return the data is it is. :param data: numpy array of data with at least the field time. It is assumed the data is sorted by time :param targets: targets in the st.get_array to get :param works_on_target: regex match string to match any of the targets. By default set to '' such that any target in the targets would be remapped (which is what we want as channels are present in most data types). If one only wants records (no raw-records) and peaks* use e.g. works_on_target = 'records|peaks'. """ if np.any(data['time'][:2] >= TSTART_FIRST_CORRECTLY_CABLED_RUN): # We leave the 'new' data be pass elif not np.any([match(works_on_target, t) for t in strax.to_str_tuple(targets)]): # None of the targets are such that we want to remap pass elif len(data): # select the old data and do the remapping for this warn("Correcting data of runs with mis-cabled PMTs. \nSee: https://" "xe1t-wiki.lngs.infn.it/doku.php?id=xenon:xenonnt:dsg:daq:sector_swap. " "Don't use '' selection_str='channel == xx' '' (github.com/XENONnT/straxen/issues/239)") mask = data['time'] < TSTART_FIRST_CORRECTLY_CABLED_RUN data[mask] = remap_channels(data[mask]) return data
def make(self, run_id: ty.Union[str, tuple, list], targets, save=tuple(), max_workers=None, **kwargs) -> None: """Compute target for run_id. Returns nothing (None). {get_docs} """ # Multi-run support run_ids = strax.to_str_tuple(run_id) if len(run_ids) > 1: return multi_run(self, run_ids, targets=targets, save=save, max_workers=max_workers, **kwargs) for _ in self.get_iter(run_ids[0], targets, save=save, max_workers=max_workers, **kwargs): pass
def get_array(self, run_id: ty.Union[str, tuple, list], targets, save=tuple(), max_workers=None, **kwargs) -> np.ndarray: """Compute target for run_id and return as numpy array {get_docs} """ run_ids = strax.to_str_tuple(run_id) if len(run_ids) > 1: results = multi_run(self.get_array, run_ids, targets=targets, save=save, max_workers=max_workers, **kwargs) else: results = list( self.get_iter(run_ids[0], targets, save=save, max_workers=max_workers, **kwargs)) if len(results): return np.concatenate(results) raise ValueError("No results returned?")
def estimate_run_start(self, run_id, targets=None): """Return run start time in ns since epoch. This fetches from run metadata, and if this fails, it estimates it using data metadata from targets. """ try: # Use run metadata, if it is available, to get # the run start time (floored to seconds) t0 = self.run_metadata(run_id, 'start')['start'] t0 = t0.replace(tzinfo=datetime.timezone.utc) return int(t0.timestamp()) * int(1e9) except (strax.RunMetadataNotAvailable, KeyError): pass # Get an approx start from the data itself, # then floor it to seconds for consistency if targets: for t in strax.to_str_tuple(targets): try: t0 = self.get_meta(run_id, t)['chunks'][0]['start'] return (int(t0) // int(1e9)) * int(1e9) except strax.DataNotAvailable: pass warnings.warn( "Could not estimate run start time from " "run metadata: assuming it is 0", UserWarning) return 0
def get_itp_dict(loaded_json, run_id=None, time_key='time', itp_keys='correction', **kwargs ) -> typing.Union[np.ndarray, typing.Dict[str, np.ndarray]]: """ Interpolate a dictionary at the start time that is queried from a run-id. :param loaded_json: a dictionary with a time-series :param run_id: run_id :param time_key: key that gives the timestamps :param itp_keys: which keys from the dict to read. Should be comma (',') separated! :return: Interpolated values of dict at the start time, either returned as an np.ndarray (single value) or as a dict (multiple itp_dict_keys) """ keys = strax.to_str_tuple(itp_keys.split(',')) for key in list(keys) + [time_key]: if key not in loaded_json: raise KeyError( f"The json does contain the key '{key}'. Try one of: {loaded_json.keys()}" ) times = loaded_json[time_key] # get start time of this run. Need to make tz-aware start = xent_collection().find_one({'number': int(run_id)}, {'start': 1})['start'] start = pytz.utc.localize(start).timestamp() * 1e9 try: if len(strax.to_str_tuple(keys)) > 1: return { key: interp1d(times, loaded_json[key], bounds_error=True)(start) for key in keys } else: interp = interp1d(times, loaded_json[keys[0]], bounds_error=True) return interp(start) except ValueError as e: raise ValueError(f"Correction is not defined for run {run_id}") from e
def __init__(self, st: strax.context, run_id: ty.Union[str, tuple, list], target: ty.Union[str, tuple], config: ty.Union[dict, immutabledict, None] = None, ): self.st = st.new_context() self.run_id = strax.to_str_tuple(run_id) self.target = strax.to_str_tuple(target) if config is None: config = {} self.config = config self.st.set_config(config) self.base_dir = self.extract_base_dir(st) for subdir in self.base_dir_requires: os.makedirs(os.path.join(self.base_dir, subdir), exist_ok=True)
def define_run(self: strax.Context, name: str, data: ty.Union[np.ndarray, pd.DataFrame, dict], from_run: ty.Union[str, None] = None): if isinstance(data, (pd.DataFrame, np.ndarray)): # Array of events / regions of interest start, end = data['time'], strax.endtime(data) if from_run is not None: return self.define_run( name, {from_run: np.transpose([start, end])}) elif not 'run_id' in data: raise ValueError( "Must provide from_run or data with a run_id column " "to define a superrun") else: df = pd.DataFrame(dict(starts=start, ends=end, run_id=data['run_id'])) return self.define_run( name, {run_id: rs[['start', 'stop']].values.transpose() for run_id, rs in df.groupby('fromrun')}) if isinstance(data, (list, tuple)): # list of runids data = strax.to_str_tuple(data) return self.define_run( name, {run_id: 'all' for run_id in data}) if not isinstance(data, dict): raise ValueError(f"Can't define run from {type(data)}") # Find start and end time of the new run = earliest start time of other runs run_md = dict(start=float('inf'), end=0, livetime=0) for _subrunid in data: doc = self.run_metadata(_subrunid, ['start', 'end']) run_md['start'] = min(run_md['start'], doc['start']) run_md['end'] = max(run_md['end'], doc['end']) run_md['livetime'] += doc['end'] - doc['start'] # Superrun names must start with an underscore if not name.startswith('_'): name = '_' + name # Dict mapping run_id: array of time ranges or all for sf in self.storage: if not sf.readonly and sf.can_define_runs: sf.define_run(name, sub_run_spec=data, **run_md) break else: raise RuntimeError("No storage frontend registered that allows" " run definition")
def _scan_runs(self, store_fields): cursor = self.collection.find(filter={}, projection=strax.to_str_tuple( list(store_fields) + ['reader.ini.name'])) for doc in tqdm(cursor, desc='Fetching run info from MongoDB', total=cursor.count()): del doc['_id'] if self.reader_ini_name_is_mode: doc['mode'] = \ doc.get('reader', {}).get('ini', {}).get('name', '') yield doc
def __init__(self): if not hasattr(self, 'depends_on'): raise ValueError('depends_on not provided for ' f'{self.__class__.__name__}') self.depends_on = strax.to_str_tuple(self.depends_on) # Store compute parameter names, see if we take chunk_i too compute_pars = list(inspect.signature(self.compute).parameters.keys()) if 'chunk_i' in compute_pars: self.compute_takes_chunk_i = True del compute_pars[compute_pars.index('chunk_i')] self.compute_pars = compute_pars
def _scan_runs(self, store_fields): cursor = self.collection.find(filter={}, projection=strax.to_str_tuple( list(store_fields) + ['reader.ini.name'])) for doc in tqdm(cursor, desc='Fetching run info from MongoDB', total=cursor.count()): # Remove the Mongo document ID and add the run mode del doc['_id'] doc.setdefault( 'mode', doc.get('reader', {}).get('ini', {}).get('name', '')) yield doc
def _scan_runs(self, store_fields): if self.minimum_run_number: query = {'number': {'$gt': self.minimum_run_number}} else: query = {} cursor = self.collection.find(filter=query, projection=strax.to_str_tuple( list(store_fields))) for doc in tqdm(cursor, desc='Fetching run info from MongoDB', total=cursor.count()): del doc['_id'] if self.reader_ini_name_is_mode: doc['mode'] = \ doc.get('reader', {}).get('ini', {}).get('name', '') yield doc
def __init__(self, store_files_at=None, *args, **kwargs): super().__init__(*args, **kwargs) # We are going to set a place where to store the files. It's # either specified by the user or we use these defaults: if store_files_at is None: store_files_at = ( './resource_cache', '/tmp/straxen_resource_cache/' '/dali/lgrandi/strax/resource_cache', ) elif not isinstance(store_files_at, (tuple, str, list)): raise ValueError(f'{store_files_at} should be tuple of paths!') elif isinstance(store_files_at, str): store_files_at = to_str_tuple(store_files_at) self.storage_options = store_files_at
def _scan_runs(self, store_fields): query = self.number_query() projection = strax.to_str_tuple(list(store_fields)) # Replace fields by their subfields if requested only take the most # "specific" projection projection = [ f1 for f1 in projection if not any([f2.startswith(f1 + ".") for f2 in projection]) ] cursor = self.collection.find(filter=query, projection=projection) for doc in tqdm(cursor, desc='Fetching run info from MongoDB', total=cursor.count()): del doc['_id'] if self.reader_ini_name_is_mode: doc['mode'] = \ doc.get('reader', {}).get('ini', {}).get('name', '') yield doc
def scan_runs(self, check_available=std_dtypes, extra_fields=tuple()): """Update and return self.runs with runs currently available in the runs db. :param check_available: Check whether these data types are available Availability of xxx is stored as a boolean in the xxx_available column. :param extra_fields: Additional fields from run doc to include as rows in the dataframe. """ base_fields = [ 'name', 'number', 'reader.ini.name', 'tags.name', 'start', 'end', 'trigger.events_built', 'tags.name' ] if self.rundb is None: raise RuntimeError("Cannot scan runs db if no " "rundb frontend is registered in the context.") docs = [] cursor = self.rundb.find( filter={}, projection=(base_fields + list(strax.to_str_tuple(extra_fields)))) for doc in tqdm(cursor, desc='Loading run info', total=cursor.count()): # TODO: Perhaps we should turn this query into an aggregation # to return also availability of key data types # (records, peaks, events?) # If there is no name, make one from the number doc.setdefault('name', str(doc['number'])) # Process and flatten the doc # Convert tags to single string doc['tags'] = ','.join([t['name'] for t in doc.get('tags', [])]) doc = straxen.flatten_dict(doc, separator='__') del doc['_id'] # Remove the Mongo document ID docs.append(doc) self.runs = pd.DataFrame(docs) for d in tqdm(check_available, desc='Checking data availability'): self.runs[d + '_available'] = np.in1d(self.runs.name.values, self.list_available(d)) return self.runs
def test_later_compare(self): st = self.script.st st2 = st.new_context() for t in strax.to_str_tuple(self.script.target): print(run_id, t) st2.make(run_id, t) peaks_1 = st.get_array(run_id, 'match_acceptance_extended') peaks_2 = st2.get_array(run_id, 'match_acceptance_extended') if 'run_id' not in peaks_1.dtype.names: peaks_1 = pema.append_fields(peaks_1, 'run_id', [run_id] * len(peaks_1)) peaks_2 = pema.append_fields(peaks_2, 'run_id', [run_id] * len(peaks_2)) pema.compare_truth_and_outcome(st, peaks_1, max_peaks=2, show=False, fig_dir=self.tempdir, raw=True) plt.clf() pema.compare_outcomes( st, peaks_1, st2, peaks_2, max_peaks=2, show=False, different_by=None, fig_dir=self.tempdir, raw=False, ) plt.clf() if len(peaks_1): pema.summary_plots.plot_peak_matching_histogram( peaks_1, 'n_photon', bin_edges=[0, int(peaks_1['n_photon'].max())]) plt.clf() pema.summary_plots.acceptance_plot( peaks_1, 'n_photon', bin_edges=[0, int(peaks_1['n_photon'].max())]) plt.clf()
def print_versions( modules=('strax', 'straxen', 'cutax'), print_output=not _is_jupyter, include_python=True, return_string=False, include_git=True, ): """ Print versions of modules installed. :param modules: Modules to print, should be str, tuple or list. E.g. print_versions(modules=('numpy', 'dddm',)) :param return_string: optional. Instead of printing the message, return a string :param include_git: Include the current branch and latest commit hash :return: optional, the message that would have been printed """ versions = defaultdict(list) if include_python: versions['module'] = ['python'] versions['version'] = [python_version()] versions['path'] = [sys.executable] versions['git'] = [None] for m in strax.to_str_tuple(modules): result = _version_info_for_module(m, include_git=include_git) if result is None: continue version, path, git_info = result versions['module'].append(m) versions['version'].append(version) versions['path'].append(path) versions['git'].append(git_info) df = pd.DataFrame(versions) info = f'Host {socket.getfqdn()}\n{df.to_string(index=False,)}' if print_output: print(info) if return_string: return info return df