コード例 #1
0
def cls(obj, context=None, continuous=None, space_formatter=lambda s: sub(r'\s', "_", s), indent=None):
    """Display presumed annotation/factor StreamedAnnotationTable in plaintext CLS format"""
    if getattr(obj, "cls_valid", None) is not True:
        msg = "Exactly one target assay/study metadata field must be present"
        _kw = dict(target_columns=getattr(obj, "metadata_columns", []))
        raise GeneFabFormatException(msg, **_kw, format="cls")
    else:
        target_name = ".".join(obj.metadata_columns[0])
        target = obj._column_key_dispatcher[target_name]
    def content(continuous=continuous, space_formatter=space_formatter):
        if (continuous is None) or (continuous is True):
            try:
                lines = _list_continuous_cls(obj, target, target_name)
            except ValueError:
                if continuous is True:
                    msg = "Cannot represent target annotation as continuous"
                    _kw = dict(target=target_name, format="cls")
                    raise GeneFabFormatException(msg, **_kw)
                else:
                    continuous = False
        if continuous is False:
            space_formatter = space_formatter or as_is
            lines = _iter_discrete_cls(obj, target, space_formatter)
        yield from lines
    return content, "text/plain"
コード例 #2
0
ファイル: data.py プロジェクト: LankyCyril/genefab3
def combine_objects(objects, context, limit=None):
    """Combine objects and post-process"""
    if len(objects) == 0:
        return None
    elif len(objects) == 1:
        combined = objects[0]
    elif all(isinstance(obj, StreamedDataTableWizard) for obj in objects):
        combined = StreamedDataTableWizard.concat(objects, axis=1)
    else:
        raise NotImplementedError("Merging non-table data objects")
    if isinstance(combined, StreamedDataTableWizard):
        if context.data_columns and (context.format == "gct"):
            msg = "GCT format is disabled for arbitrarily subset tables"
            _kw = dict(columns="|".join(context.data_columns))
            raise GeneFabFormatException(msg, **_kw)
        else:
            combined.constrain_columns(context=context)
            return combined.get(context=context)
    elif context.data_columns or context.data_comparisons:
        raise GeneFabFileException(
            "Column operations on non-table data objects are not supported",
            columns=context.data_columns,
            comparisons=context.data_comparisons,
        )
    else:
        return combined
コード例 #3
0
def gct(obj, context=None, indent=None, level_formatter="/".join):
    """Display StreamedDataTable in plaintext GCT format, if supported"""
    if (not isinstance(obj, StreamedDataTable)) or (len(obj.datatypes) == 0):
        msg = "No datatype information associated with retrieved data"
        raise GeneFabConfigurationException(msg)
    elif len(obj.datatypes) > 1:
        msg = "GCT format does not support mixed datatypes"
        raise GeneFabFormatException(msg, datatypes=obj.datatypes)
    elif not obj.gct_valid:
        msg = "GCT format is not valid for given datatype"
        raise GeneFabFormatException(msg, datatype=obj.datatypes.pop())
    else:
        def content():
            obj.na_rep = "" # https://www.genepattern.org/file-formats-guide#GCT
            yield "#1.2\n{}\t{}\n".format(*obj.shape)
            yield "Name\tDescription"
            for level in obj.columns:
                yield "\t" + level_formatter(level)
            yield "\n"
            _iter_value_lines = _iter_xsv_chunks(obj.values, "", "\t", 0)
            for (index, *_), value_line in zip(obj.index, _iter_value_lines):
                yield f"{index}\t{index}\t{value_line}"
    return content, "text/plain"
コード例 #4
0
 def content(continuous=continuous, space_formatter=space_formatter):
     if (continuous is None) or (continuous is True):
         try:
             lines = _list_continuous_cls(obj, target, target_name)
         except ValueError:
             if continuous is True:
                 msg = "Cannot represent target annotation as continuous"
                 _kw = dict(target=target_name, format="cls")
                 raise GeneFabFormatException(msg, **_kw)
             else:
                 continuous = False
     if continuous is False:
         space_formatter = space_formatter or as_is
         lines = _iter_discrete_cls(obj, target, space_formatter)
     yield from lines
コード例 #5
0
ファイル: renderer.py プロジェクト: LankyCyril/genefab3
 def _call_and_cache():
     obj = method(*args, context=context, **kwargs)
     try:
         obj = obj.schema if (context.schema == "1") else obj
     except AttributeError:
         msg = "'schema=1' is not valid for requested data"
         _type = type(obj).__name__
         raise GeneFabFormatException(msg, type=_type)
     default_format = getattr(obj, "default_format", "raw")
     content, mimetype = self.dispatch_renderer(
         obj,
         context=context,
         default_format=default_format,
     )
     response_container.update(content, mimetype, obj)
     if getattr(obj, "cacheable", None) is True:
         if response_cache is not None:
             response_cache.put(response_container, context)
コード例 #6
0
ファイル: renderer.py プロジェクト: LankyCyril/genefab3
 def dispatch_renderer(self, obj, context, default_format, indent=None):
     """Render `obj` according to its type and passed kwargs: pass through content and mimetype"""
     for types, fmt_to_renderer in TYPE_RENDERERS.items():
         if isinstance(obj, types):
             if context.format is None:
                 renderer = fmt_to_renderer[default_format]
             elif context.format in fmt_to_renderer:
                 renderer = fmt_to_renderer[context.format]
             else:
                 raise GeneFabFormatException(
                     "Requested format not valid for requested data",
                     type=type(obj).__name__,
                     format=context.format,
                     default_format=default_format,
                 )
             return renderer(obj, context, indent=indent)
     else:
         msg = "Route returned unsupported object"
         raise GeneFabConfigurationException(msg, type=type(obj).__name__)
コード例 #7
0
def speed_up_data_schema(get, self, *, context, limit=None, offset=0):
    """If context.schema == '1', replaces underlying query with quick retrieval of just values informative for schema"""
    if context.schema != "1":
        return get(self, context=context, limit=limit, offset=offset)
    elif context.data_columns or context.data_comparisons:
        msg = "Data schema does not support column subsetting / comparisons"
        sug = "Remove comparisons and/or column, row slicing from query"
        raise GeneFabFormatException(msg, suggestion=sug)
    else:
        from genefab3.db.sql.streamed_tables import (
            SQLiteIndexName,
            StreamedDataTableWizard_Single,
            StreamedDataTableWizard_OuterJoined,
        )
        GeneFabLogger.info(f"apply_hack(speed_up_data_schema) for {self.name}")
        sub_dfs, sub_indices = OrderedDict(), {}
        sub_columns, index_name = [], []

        def _extend_parts(obj):
            for partname, partcols in obj._inverse_column_dispatcher.items():
                if isinstance(partcols[0], SQLiteIndexName):
                    index_name.clear()
                    index_name.append(partcols[0])
                    sub_df = get_sub_df(obj, partname, partcols)
                else:
                    sub_df = get_sub_df(obj, partname,
                                        [*index_name, *partcols])
                sub_indices[partname] = get_part_index(obj, partname)
                sub_dfs[partname] = sub_df
                _ocr2f = obj._columns_raw2full
                sub_columns.extend(_ocr2f[c] for c in sub_df.columns)

        if isinstance(self, StreamedDataTableWizard_Single):
            _extend_parts(self)
        elif isinstance(self, StreamedDataTableWizard_OuterJoined):
            for obj in self.objs:
                _extend_parts(obj)
        else:
            msg = "Schema speedup applied to unsupported object type"
            raise GeneFabConfigurationException(msg, type=type(self))
        sub_merged = merge_subs(self, sub_dfs, sub_indices)
        return StreamedDataTableSub(sub_merged, sub_columns)