def create_adjusted_atlases( linear: Model, poly: Model, ids: AnalysisIdentifiers, atlas_indices: Optional[List[int]] = None, free_text: str = "", ) -> List[str]: """ input: linear_model: instance of class Model with first order model poly_model: instance of class Model with second order model ids: an AnalysisIds object matching the one used in the main notebook atlas_indices: list of integers for which adjusted atlases to create 0: EMA_Unlab 1: QCv3_Unlab 2: ISv5_Unlab 3: ISv5_13C15N 4: IS_LabUnlab2 free_text: arbitrary string to append to atlas name returns a list of the names of atlases """ # pylint: disable=too-many-locals assert ids.chromatography in ["HILIC", "C18"] default_atlas_indices = [0, 1] if ids.chromatography == "C18" else [0, 4] atlas_indices = default_atlas_indices if atlas_indices is None else atlas_indices plot_vars = [ (polarity, idx, model) for polarity in ["positive", "negative"] for idx in atlas_indices for model in [linear, poly] ] out_atlas_names = [] for polarity, idx, model in tqdm(plot_vars, unit="atlas"): template_atlas = get_template_atlas(ids, polarity, idx) out_atlas_names.append(get_atlas_name(template_atlas.name, ids, model, free_text)) logger.info("Creating atlas %s", out_atlas_names[-1]) out_atlas_file_name = os.path.join(ids.output_dir, f"{out_atlas_names[-1]}.csv") out_atlas_df = adjust_atlas(template_atlas, model, ids) write_utils.export_dataframe_die_on_diff( out_atlas_df, out_atlas_file_name, "predicted atlas", index=False, float_format="%.6e" ) dp.make_atlas_from_spreadsheet( out_atlas_df, out_atlas_names[-1], filetype="dataframe", sheetname="", polarity=polarity, store=True, mz_tolerance=10 if ids.chromatography == "C18" else 12, ) return out_atlas_names
def make_atlas_from_df(data: pd.DataFrame, name: str, polarity: str, mz_tolerance: float) -> metob.Atlas: atlas = dp.make_atlas_from_spreadsheet(data, name, filetype="dataframe", polarity=polarity, store=False, mz_tolerance=mz_tolerance) return fill_atlas_compound_fields(atlas)
def _clone_source_atlas(self) -> metob.Atlas: logger.info("Retriving source atlas: %s", self.ids.source_atlas) source_atlas = get_atlas(cast(AtlasName, self.ids.source_atlas), cast(Username, "*")) source_atlas_df = ma_data.make_atlas_df(source_atlas) logger.info("Cloning atlas %s", self.ids.source_atlas) return dp.make_atlas_from_spreadsheet( source_atlas_df, self.ids.atlas, filetype="dataframe", sheetname="", polarity=self.ids.polarity, store=True, mz_tolerance=source_atlas.compound_identifications[0]. mz_references[0].mz_tolerance, )
def test_make_atlas_from_spreadsheet(mocker, sqlite_with_atlas): csv_data = pd.DataFrame({ "rt_min": [1.1], "rt_peak": [1.3], "rt_max": [1.5], "mz": [234.6578] }) mocker.patch("metatlas.plots.dill2plots._get_dataframe", return_value=csv_data) atlas = dill2plots.make_atlas_from_spreadsheet("foo.csv", "test_atlas_99", "csv", polarity="positive", mz_tolerance=5) assert len(atlas.compound_identifications) == 1 assert atlas.compound_identifications[0].rt_references[0].rt_peak == 1.3
def generate_template_atlas(raw_file_name: str, confidence_levels: List[str], polarity: str, name: str, mz_tolerance: float = 10) -> metob.Atlas: data = pd.read_csv(raw_file_name, sep="\t") acceptable = data[data["confidence_category"].isin(confidence_levels)] by_polarity = acceptable[acceptable["polarity"] == polarity] by_polarity = by_polarity.assign(label=None) atlas = dp.make_atlas_from_spreadsheet(by_polarity, name, filetype="dataframe", polarity=polarity, store=False, mz_tolerance=mz_tolerance) inchi_keys = [ cid.compound[0].inchi_key for cid in atlas.compound_identifications ] pubchem_results = query_pubchem(inchi_keys) for cid in atlas.compound_identifications: fill_fields(cid.compound[0], pubchem_results) cid.name = cid.compound[0].name return atlas