コード例 #1
0
ファイル: parser.py プロジェクト: LankyCyril/genefab3
 def _ingest_raw_isa(self, data, status_kwargs):
     """Unpack ZIP from URL and delegate to top-level parsers"""
     raw = SimpleNamespace(investigation=None, studies={}, assays={})
     with ZipFile(BytesIO(data)) as archive:
         for filepath in archive.namelist():
             _, filename = path.split(filepath)
             matcher = search(r'^([isa])_(.+)\.txt$', filename)
             if matcher:
                 kind, name = matcher.groups()
                 with archive.open(filepath) as handle:
                     if kind == "i":
                         reader = self._read_investigation
                         raw.investigation = reader(handle)
                     elif kind == "s":
                         reader = self._read_tab
                         raw.studies[name] = reader(handle, status_kwargs)
                     elif kind == "a":
                         reader = self._read_tab
                         raw.assays[name] = reader(handle, status_kwargs)
     for tab, value in raw.__dict__.items():
         if not value:
             msg = "Missing ISA tab"
             _kw = copy_except(status_kwargs, "collection")
             raise GeneFabISAException(msg, tab=tab, **_kw)
     return raw
コード例 #2
0
ファイル: parser.py プロジェクト: LankyCyril/genefab3
 def __init__(self, raw_tabs, status_kwargs):
     """Convert tables to nested JSONs"""
     if self._self_identifier == "Study":
         self._by_sample_name = {}
     else:  # lookup in classes like AssayEntries would be ambiguous
         self._by_sample_name = defaultdict(self._abort_lookup)
     for name, raw_tab in raw_tabs.items():
         for _, row in raw_tab.iterrows():
             if "Sample Name" not in row:
                 msg = f"{self._self_identifier} entry missing 'Sample Name'"
                 _kw = copy_except(status_kwargs, "collection")
                 raise GeneFabISAException(msg, **_kw)
             else:
                 sample_name = row["Sample Name"]
             if isinstance(sample_name, Series):
                 if len(set(sample_name)) > 1:
                     _m = "entry has multiple 'Sample Name' values"
                     msg = f"{self._self_identifier} {_m}"
                     _kw = copy_except(status_kwargs, "collection")
                     raise GeneFabISAException(msg, **_kw)
                 else:
                     sample_name = sample_name.iloc[0]
             if not isnull(sample_name):
                 _kw = {**status_kwargs, "sample_name": sample_name}
                 json = self._row_to_json(row, name, _kw)
                 super().append(json)
                 if self._self_identifier == "Study":
                     if sample_name in self._by_sample_name:
                         msg = "Duplicate 'Sample Name' in Study tab"
                         _kw = copy_except(status_kwargs, "collection")
                         _kkw = dict(sample_name=sample_name, **_kw)
                         raise GeneFabISAException(msg, **_kkw)
                     else:
                         self._by_sample_name[sample_name] = json
             else:
                 update_status(
                     **status_kwargs,
                     status="warning",
                     warning="Null 'Sample Name'",
                     tab=self._self_identifier,
                 )
コード例 #3
0
ファイル: types.py プロジェクト: LankyCyril/genefab3
 def _INPLACE_extend_with_dataset_files(self):
     """Populate with File annotation for files that match records for the sample"""
     isa_elements = set(iterate_terminal_leaf_elements(self))
     _sdf = self.dataset.files
     _no_condition = lambda *_: True
     self["File"] = [{
         **copy_except(_sdf[f], "condition"), "filename": f
     } for f in {
         filename
         for filename, filedata in _sdf.items()
         if ((filedata.get("internal") or (filename in isa_elements))
             and filedata.get("condition", _no_condition)(self, filename))
     }]
コード例 #4
0
ファイル: parser.py プロジェクト: LankyCyril/genefab3
 def _row_to_json(self, row, name, status_kwargs):
     """Convert single row of table to nested JSON"""
     json = {"Id": {f"{self._self_identifier} Name": name}}
     protocol_ref, qualifiable = nan, None
     for column, value in row.items():
         field, subfield, extra = self._parse_field(column)
         if field is None:
             update_status(
                 **status_kwargs,
                 status="warning",
                 tab=self._self_identifier,
                 field=repr(column),
                 warning="ISA field is not a string",
             )
         else:
             if field == "Protocol REF":
                 protocol_ref = value
             elif self._is_not_qualifier(field):  # top-level field
                 if not subfield:  # e.g. "Source Name"
                     qualifiable = self._INPLACE_add_toplevel_field(
                         json,
                         field,
                         value,
                         protocol_ref,
                     )
                 else:  # e.g. "Characteristics[Age]"
                     qualifiable = self._INPLACE_add_metadatalike(
                         json,
                         field,
                         subfield,
                         value,
                         protocol_ref,
                         status_kwargs,
                     )
             else:  # qualify entry at pointer with second-level field
                 if qualifiable is None:
                     msg = "Qualifier before main field"
                     _kw = copy_except(status_kwargs, "collection")
                     raise GeneFabISAException(msg, field=value, **_kw)
                 else:
                     self._INPLACE_qualify(
                         qualifiable,
                         field,
                         subfield,
                         value,
                         status_kwargs={
                             **status_kwargs, "name": name
                         },
                     )
     return json
コード例 #5
0
ファイル: parser.py プロジェクト: LankyCyril/genefab3
 def __init__(self, raw_investigation, status_kwargs):
     """Convert dataframes to JSONs"""
     for real_name, isatools_name, target, pattern in self._key_dispatcher:
         if isatools_name in raw_investigation:
             content = raw_investigation[isatools_name]
             _kw = dict(coerce_comments=True, status_kwargs=status_kwargs)
             if isinstance(content, list):
                 json = [self._jsonify(df, **_kw) for df in content]
             else:
                 json = self._jsonify(content, **_kw)
             if isinstance(json, list):
                 if (len(json) == 1) and isinstance(json[0], list):
                     json = json[0]
             if isinstance(target, int) and isinstance(pattern, int):
                 try:
                     if len(json) != pattern:
                         raise IndexError
                     else:
                         super().__setitem__(real_name, json[target])
                 except (TypeError, IndexError, KeyError):
                     msg = "Unexpected structure of field"
                     _kw = copy_except(status_kwargs, "collection")
                     raise GeneFabISAException(msg, field=real_name, **_kw)
             elif target and pattern:
                 try:
                     super().__setitem__(
                         real_name, {
                             search(pattern, entry[target]).group(1): entry
                             for entry in json
                         })
                 except (TypeError, AttributeError, IndexError, KeyError):
                     msg = "Could not break up field by name"
                     _kw = copy_except(status_kwargs, "collection")
                     raise GeneFabISAException(msg, field=real_name, **_kw)
             else:
                 super().__setitem__(real_name, json)
コード例 #6
0
ファイル: parser.py プロジェクト: LankyCyril/genefab3
 def _INPLACE_add_metadatalike(self, json, field, subfield, value,
                               protocol_ref, status_kwargs):
     """Add metadatalike to json (e.g. 'Characteristics' -> 'Age'), qualify with 'Protocol REF', point to resulting field"""
     if field not in json:
         json[field] = {}
     if subfield in json[field]:
         msg = "Duplicate field[subfield]"
         _k = copy_except(status_kwargs, "collection")
         raise GeneFabISAException(msg,
                                   field=field,
                                   subfield=subfield,
                                   **_k)
     else:  # make {"Characteristics": {"Age": {"": "36"}}}
         json[field][subfield] = {"": value}
         qualifiable = json[field][subfield]
         if field == "Parameter Value":
             qualifiable["Protocol REF"] = protocol_ref
         return qualifiable