def cls(obj, context=None, continuous=None, space_formatter=lambda s: sub(r'\s', "_", s), indent=None): """Display presumed annotation/factor StreamedAnnotationTable in plaintext CLS format""" if getattr(obj, "cls_valid", None) is not True: msg = "Exactly one target assay/study metadata field must be present" _kw = dict(target_columns=getattr(obj, "metadata_columns", [])) raise GeneFabFormatException(msg, **_kw, format="cls") else: target_name = ".".join(obj.metadata_columns[0]) target = obj._column_key_dispatcher[target_name] def content(continuous=continuous, space_formatter=space_formatter): if (continuous is None) or (continuous is True): try: lines = _list_continuous_cls(obj, target, target_name) except ValueError: if continuous is True: msg = "Cannot represent target annotation as continuous" _kw = dict(target=target_name, format="cls") raise GeneFabFormatException(msg, **_kw) else: continuous = False if continuous is False: space_formatter = space_formatter or as_is lines = _iter_discrete_cls(obj, target, space_formatter) yield from lines return content, "text/plain"
def combine_objects(objects, context, limit=None): """Combine objects and post-process""" if len(objects) == 0: return None elif len(objects) == 1: combined = objects[0] elif all(isinstance(obj, StreamedDataTableWizard) for obj in objects): combined = StreamedDataTableWizard.concat(objects, axis=1) else: raise NotImplementedError("Merging non-table data objects") if isinstance(combined, StreamedDataTableWizard): if context.data_columns and (context.format == "gct"): msg = "GCT format is disabled for arbitrarily subset tables" _kw = dict(columns="|".join(context.data_columns)) raise GeneFabFormatException(msg, **_kw) else: combined.constrain_columns(context=context) return combined.get(context=context) elif context.data_columns or context.data_comparisons: raise GeneFabFileException( "Column operations on non-table data objects are not supported", columns=context.data_columns, comparisons=context.data_comparisons, ) else: return combined
def gct(obj, context=None, indent=None, level_formatter="/".join): """Display StreamedDataTable in plaintext GCT format, if supported""" if (not isinstance(obj, StreamedDataTable)) or (len(obj.datatypes) == 0): msg = "No datatype information associated with retrieved data" raise GeneFabConfigurationException(msg) elif len(obj.datatypes) > 1: msg = "GCT format does not support mixed datatypes" raise GeneFabFormatException(msg, datatypes=obj.datatypes) elif not obj.gct_valid: msg = "GCT format is not valid for given datatype" raise GeneFabFormatException(msg, datatype=obj.datatypes.pop()) else: def content(): obj.na_rep = "" # https://www.genepattern.org/file-formats-guide#GCT yield "#1.2\n{}\t{}\n".format(*obj.shape) yield "Name\tDescription" for level in obj.columns: yield "\t" + level_formatter(level) yield "\n" _iter_value_lines = _iter_xsv_chunks(obj.values, "", "\t", 0) for (index, *_), value_line in zip(obj.index, _iter_value_lines): yield f"{index}\t{index}\t{value_line}" return content, "text/plain"
def content(continuous=continuous, space_formatter=space_formatter): if (continuous is None) or (continuous is True): try: lines = _list_continuous_cls(obj, target, target_name) except ValueError: if continuous is True: msg = "Cannot represent target annotation as continuous" _kw = dict(target=target_name, format="cls") raise GeneFabFormatException(msg, **_kw) else: continuous = False if continuous is False: space_formatter = space_formatter or as_is lines = _iter_discrete_cls(obj, target, space_formatter) yield from lines
def _call_and_cache(): obj = method(*args, context=context, **kwargs) try: obj = obj.schema if (context.schema == "1") else obj except AttributeError: msg = "'schema=1' is not valid for requested data" _type = type(obj).__name__ raise GeneFabFormatException(msg, type=_type) default_format = getattr(obj, "default_format", "raw") content, mimetype = self.dispatch_renderer( obj, context=context, default_format=default_format, ) response_container.update(content, mimetype, obj) if getattr(obj, "cacheable", None) is True: if response_cache is not None: response_cache.put(response_container, context)
def dispatch_renderer(self, obj, context, default_format, indent=None): """Render `obj` according to its type and passed kwargs: pass through content and mimetype""" for types, fmt_to_renderer in TYPE_RENDERERS.items(): if isinstance(obj, types): if context.format is None: renderer = fmt_to_renderer[default_format] elif context.format in fmt_to_renderer: renderer = fmt_to_renderer[context.format] else: raise GeneFabFormatException( "Requested format not valid for requested data", type=type(obj).__name__, format=context.format, default_format=default_format, ) return renderer(obj, context, indent=indent) else: msg = "Route returned unsupported object" raise GeneFabConfigurationException(msg, type=type(obj).__name__)
def speed_up_data_schema(get, self, *, context, limit=None, offset=0): """If context.schema == '1', replaces underlying query with quick retrieval of just values informative for schema""" if context.schema != "1": return get(self, context=context, limit=limit, offset=offset) elif context.data_columns or context.data_comparisons: msg = "Data schema does not support column subsetting / comparisons" sug = "Remove comparisons and/or column, row slicing from query" raise GeneFabFormatException(msg, suggestion=sug) else: from genefab3.db.sql.streamed_tables import ( SQLiteIndexName, StreamedDataTableWizard_Single, StreamedDataTableWizard_OuterJoined, ) GeneFabLogger.info(f"apply_hack(speed_up_data_schema) for {self.name}") sub_dfs, sub_indices = OrderedDict(), {} sub_columns, index_name = [], [] def _extend_parts(obj): for partname, partcols in obj._inverse_column_dispatcher.items(): if isinstance(partcols[0], SQLiteIndexName): index_name.clear() index_name.append(partcols[0]) sub_df = get_sub_df(obj, partname, partcols) else: sub_df = get_sub_df(obj, partname, [*index_name, *partcols]) sub_indices[partname] = get_part_index(obj, partname) sub_dfs[partname] = sub_df _ocr2f = obj._columns_raw2full sub_columns.extend(_ocr2f[c] for c in sub_df.columns) if isinstance(self, StreamedDataTableWizard_Single): _extend_parts(self) elif isinstance(self, StreamedDataTableWizard_OuterJoined): for obj in self.objs: _extend_parts(obj) else: msg = "Schema speedup applied to unsupported object type" raise GeneFabConfigurationException(msg, type=type(self)) sub_merged = merge_subs(self, sub_dfs, sub_indices) return StreamedDataTableSub(sub_merged, sub_columns)