def get_atom_masses(df): """Obtain masses for all possible atoms in the list. will also change things that are in the abbreviation dict to formula Parameters ---------- df : DataFrame DataFrame of all adducts and charges given Returns ------- atom_dict : dict Dictionary {atom:count} mass_dict : dict Dictionary {adduct:mass} all_atoms : list of lists of lists of strings contains all adducts as lists of atom/multiplier/sign """ atom_dict = {} mass_dict = { 'e': MASS_ELECTRON_DALTON } # pre-store mass of electron bc molmass can't process all_atoms = [get_ions(s, atom_dict) for s in df['adduct']] for atom in atom_dict.keys(): if atom == '+' or atom == '-': continue if atom in abbrev_to_formula: f = Formula(abbrev_to_formula[atom]) else: f = Formula(atom) if atom not in mass_dict: # use monoisotopic mass mass_dict[atom] = f.isotope.mass return atom_dict, mass_dict, all_atoms
def get_iso_intense(mzml_file_obj, target_rt_range, formula, adduct_type, mz_tol=(5, 'ppm'), scan_delta=2, max_iso_n=5): relevant_scans = list(filter(lambda x: x.rt_in_seconds >= target_rt_range[0] and x.rt_in_seconds <= target_rt_range[1], mzml_file_obj.scans)) spectrum = Formula(formula).spectrum() if adduct_type == '[M-H+FA]-': adduct_type = '[M-H+CH2O2]-' target_mz = [AdductTransformer().mass2ion(x[0], adduct_type) for x in spectrum.values()] target_mz.sort() isos = [] t = target_mz[0] max_i = 0 max_mz = -1 max_idx = None max_rt = None max_scan_no = None if mz_tol[1] == 'ppm': mz_tol_abs = t*mz_tol[0]/1e6 else: mz_tol_abs = mz_tol[0] for i, s in enumerate(relevant_scans): intensity, exact_mz = get_max_mz(s, t-mz_tol_abs, t+mz_tol_abs) if intensity >= max_i: max_i = intensity max_idx = i max_mz = exact_mz max_rt = s.rt_in_seconds max_scan_no = s.scan_no isos.append((0, t, max_i, max_mz, max_rt, max_scan_no)) pos = 0 for t in target_mz[1:]: pos += 1 if pos > max_iso_n: break max_i = 0 max_mz = -1 max_rt = None max_scan_no = None for scan_idx in range(max_idx-scan_delta, max_idx+scan_delta+1): if scan_idx >= 0 and scan_idx < len(relevant_scans): s = relevant_scans[scan_idx] intensity, exact_mz = get_max_mz(s, t-mz_tol_abs, t+mz_tol_abs) if intensity >= max_i: max_i = intensity max_mz = exact_mz max_rt = s.rt_in_seconds max_scan_no = s.scan_no isos.append((pos, t, max_i, max_mz, max_rt, max_scan_no)) return isos
def weight_ratio(scale: Series, compositions: List[str]) -> Series: """ Calculate the molar fractions of phases based on its scale factors and compositions or structures. Parameters ---------- scale : Series A series of scale factors with the name of the phases as index. compositions : Iterable An Iterable of the compositions. Returns ------- fraction : Series A series of weight ratios with the names of the phases as index. Examples -------- >>> scale = pd.Series([0.3, 0.4], index=['scale_a', 'scale_b']) >>> composition = [{'atom_A': 1}, {'atom_B': 1}] >>> weight_ratio(scale, compositions) """ formulas = [Formula(composition) for composition in compositions] f = calc_fs_from_comps(*formulas) f1avg = Series(f, index=scale.index) m = calc_molar_masses(*formulas) mass = Series(m, index=scale.index) ratio = scale / f1avg.pow(2) mol_frac = ratio / ratio.sum() ratio1 = mol_frac * mass fraction = ratio1 / ratio1.sum() return fraction
def _get_mass(formula): # Potentially fix formula if too complex for Formula class formula = formula_to_string(formula_split(formula)) # Calculate isotopic mass and return return float(Formula(formula).isotope.mass)
def euler_pred_grid(grid, gas, model_file=""): input_species = gas.species_names # *input_species, _ = gas.species_names input_features = input_species + ["Temp", "dt"] dt = 1e-8 grid_out = grid.copy() Gca = namedtuple("Gca", ["arr", "n"]) for gca in [ Gca(grid_out.cell_arrays, grid.n_cells), Gca(grid_out.point_arrays, grid.n_points), ]: print(gca.n) gca.arr["rho"] = gca.arr["p"] * gca.arr["thermo:psi"] gca.arr["Temp"] = gca.arr["T"] gca.arr["dt"] = np.ones([gca.n]) * dt input_arr = np.empty((gca.n, 0), float) for sp in input_species: gca.arr[sp + "_c"] = gca.arr[sp] * gca.arr["rho"] / Formula(sp).mass input_arr = np.hstack( [input_arr, gca.arr[sp + "_c"].reshape(-1, 1)]) input_arr = np.hstack([ input_arr, gca.arr["Temp"].reshape(-1, 1), gca.arr["dt"].reshape(-1, 1) ]) model = tf.keras.models.load_model(model_file) pred = model.predict(input_arr, batch_size=1024 * 8) df_dnn = pd.DataFrame(pred, columns=input_species + ["Temp"]) for sp in input_species: gca.arr["RR." + sp] = np.array(df_dnn[sp].values) * Formula(sp).mass # for idx, sp in enumerate(input_species): # print(sp) # a = np.array(pred[:, idx]) # print(a) # gca.arr["RR." + sp] = a # df_dnn[["x", "y"]] = df[["x", "y"]] return grid_out
def calculate_mass(formula, isotope=True): ''' Accepts a formula and calculates its mass. Parameters ---------- formula: string or dict Molecular formula of compound isotope: boolean If True, calulates the isotopic mass. If False, calculates the average mass. Default is True. ''' formula = formula_to_string(formula) if isotope: return float(Formula(formula).isotope.mass) else: return float(Formula(formula).mass)
def create_targets_from_toxid(toxid_file_name, file_rt_units='minutes', mz_delta=10, rt_delta=60., polarity_filter=['+'], adducts_to_use=['[M+H]+', '[M+K]+', '[M+Na]+']): """ Note: mz_delta is in ppm """ target_list = [] with open(str(toxid_file_name), 'r') as f: reader = csv.reader(f) line = [None] while len(line) == 0 or not line[0] == 'Index': line = next(reader) # we will now be in the data at = AdductTransformer() for line in reader: if len(line) == 0 or line[ 0] == '-': # empty line, or undetected compound continue name = line[1] formula = line[2] polarity = line[3] if polarity not in polarity_filter: continue expected_rt = float(line[5]) if file_rt_units == 'minutes': expected_rt *= 60. for val in line[8:]: assert val == '-' or val == '' metadata = { 'name': name, 'formula': formula, 'polarity': polarity, 'expected_rt': expected_rt } for adduct in adducts_to_use: theoretical_mz = at.mass2ion( Formula(formula).isotope.mass, adduct) min_mz = theoretical_mz - theoretical_mz * mz_delta / 1e6 max_mz = theoretical_mz + theoretical_mz * mz_delta / 1e6 min_rt = expected_rt - rt_delta max_rt = expected_rt + rt_delta new_target = Target(theoretical_mz, min_mz, max_mz, min_rt, max_rt, name=name, metadata=metadata, adduct=adduct) target_list.append(new_target) return target_list
def setAdductIndex(self, value): self.adduct_index = value if value >= len(self.adduct_list): pass mass_multi = self.adduct_list[self.adduct_index][2] mass_add = self.adduct_list[self.adduct_index][1] try: mono_iso_mass = Formula(self.lipid_formula).mass self.setMass(mass2ion(mono_iso_mass, mass_multi, mass_add)) except FormulaError: pass
def calc_molar_masses(*comps: Dict[str, int]) -> List[float]: """Calculate the molar masses from the compositions.""" def to_string(dct: Dict[str, float]): return ''.join([f'{key}{value}' for key, value in dct.items()]) masses = [] for comp in comps: comp_str = to_string(comp) mass = Formula(comp_str).mass masses.append(mass) return masses
def check_rxn_mass_sum(rxn, reaction_dict, metabolite_dict): [mets, stoich, rev] = parseRxnString(reaction_dict[rxn]['bigg_string']) mass_sum = 0 for met in mets: met_i = mets.index(met) #print(met) met_name = findMetNameFromID(met, metabolite_dict) met_formula = metabolite_dict[met_name]['formula'] #print(met_formula) mf = Formula(met_formula) met_mass = mf.mass mass_sum = mass_sum + met_mass * stoich[met_i] return mass_sum
def read_of(xy, gas, plane="xy"): pts = xy.points fields = xy.point_arrays fields["rho"] = fields["p"] * fields["thermo:psi"] df = pd.DataFrame() # for sp in species: for sp in gas.species_names: df[sp + "_Y"] = fields[sp] df[sp] = fields[sp] * fields["rho"] / Formula(sp).mass # df[sp + "_RR"] = fields["RR." + sp] / Formula(sp).mass df[sp + "_RR"] = fields["RR." + sp] / Formula(sp).mass df["Hs"] = fields["hs"] df["Temp"] = fields["T"] df["rho"] = fields["rho"] df["p"] = fields["p"] # df["pd"] = fields["pd"] if "f_Bilger" in xy.scalar_names: df["f"] = fields["f_Bilger"] df["dt"] = 1e-6 df["thermo:Df"] = fields["thermo:Df"] if "Dft" in xy.scalar_names: df["Dft"] = fields["Dft"] if plane == "xy": df["x"] = np.around(pts[:, 0], decimals=5) df["y"] = np.around(pts[:, 1], decimals=5) if plane == "yz": df["x"] = np.around(pts[:, 1], decimals=5) df["y"] = np.around(pts[:, 2], decimals=5) return df
def __init__(self, adducts, chemical_formulas): self.adducts = adducts self.chemical_formulas = chemical_formulas masslibraries = {} for a in adducts: masslibraries[a] = [] for a in adducts: for mol in chemical_formulas: f = Formula(mol) mass = t.mass2ion(f.mass, a) frag = FragmentPeak(mass, None, None) masslibraries[a].append(frag) for a in adducts: masslibraries[a] = sorted(masslibraries[a]) self.masslibraries = masslibraries
def load_hmdb_msms_records(folder, accession_to_formula_file, target_mode='positive', transformations=['[M+H]+'], records={}): accession_to_formula = load_csv(accession_to_formula_file) xml_files = glob.glob(os.path.join(folder, '*.xml')) at = AdductTransformer() n_loaded = 0 n_total = len(xml_files) for xml_file in xml_files: spectrum_id, db_id, mode, peaks = parse_msms_file(xml_file) if len(peaks) == 0: # no peaks, ignore continue if target_mode == 'positive' and mode.lower( ) == 'negative': # sometimes capitalised! continue if target_mode == 'negative' and mode.lower() == 'positive': continue if not db_id in accession_to_formula: print("{} not in accession_to_formula, skipping".format(db_id)) continue try: f = Formula(accession_to_formula[db_id]) f_mass = f.isotope.mass for transformation in transformations: precursor_mz = at.mass2ion(f_mass, transformation) # make a spectral record with this as the precursor mz metadata = { 'precursor_mz': precursor_mz, 'hmdb_id': db_id, 'mode': mode, 'adduct_type': transformation, 'spectrum_id': spectrum_id } ion_id = ':'.join([str(spectrum_id), db_id, transformation]) new_record = SpectralRecord(precursor_mz, peaks, metadata, xml_file, ion_id) records[ion_id] = new_record except: print("Failed on fromula {}".format(accession_to_formula[db_id])) n_loaded += 1 if n_loaded % 100 == 0: print("Loaded {} of {}".format(n_loaded, n_total)) return records
def deal_with_mol_in_unit(df, DATAMODEL_HGC, unit_conversion={}, user_defined_feature_units={}, **kwargs): ''' To deal with units that contain mol or umol This step is done before converting units to standard HGC units ''' # record old unit for mols df['Unit_old_mol'] = df['Unit'] # spilt units and store in a df unit0 = df['Unit'].where(pd.notnull(df['Unit']), None) unit0 = unit0.replace([r''], [None]) unit0_split = _list_to_array( [re.split('/| ', str(unit)) for unit in unit0]) unit0_split = pd.DataFrame(unit0_split, columns=['Col0', 'Col1', 'Col2']) # create a empty column for storing ration_mol ratio_mol = [None] * len(unit0) # get default dictionary unit_default = { **DATAMODEL_HGC['HGC_default_feature_units'], **user_defined_feature_units } # replace mmol by mg and get ratio for conversion for i in range(len(unit0)): if df['Feature'][i] in unit_default.keys( ) and 'mol' in unit0_split.iloc[i, 0]: ratio_mol[i] = Formula( df['Feature'][i] ).mass # has to use a loop as Formula does not support vector operation with nan unit0_split.iloc[i, 2] = df['Feature'][i] unit0_split.iloc[i, 0] = unit0_split.iloc[i, 0].replace('mol', 'g') # put units back from split unit1_0 = unit0_split.Col0 unit1_1 = pd.Series(['/' + str(str_unit) for str_unit in unit0_split.Col1]).replace([r'/None'], '') unit1_2 = ' ' + unit0_split.Col2.fillna('') unit1 = unit1_0 + unit1_1 + unit1_2 unit1 = unit1.replace([r'None/ '], [None]) # get a ratio df['ratio_mol'] = ratio_mol # write new unit for mols df['Unit'] = unit1 # write log logger.info('"mol" has been mapped to "g"') return df
def validate_lipid_formula(self, text): f = Formula(text) try: self.lipid.setLipidFormula(text) self.lipid_formula.setStyleSheet("border: 1px solid green") self.lipid_formula_label.setText(f.formula) self.mass.setValue( mass2ion(f.isotope.mass, self.lipid.adduct_list[self.lipid.adduct_index][2], self.lipid.adduct_list[self.lipid.adduct_index][1])) self.mass.update() except FormulaError: self.lipid_formula.setStyleSheet("border: 1px solid red") self.lipid_formula_label.setText("") finally: self.on_change()
def generate_url(formula_entry, smiles_entry): exact_mass = 0 if formula_entry is not None and len(formula_entry): f = Formula(formula_entry) exact_mass = f.isotope.mass else: # Getting exact mass url = "https://gnps-structure.ucsd.edu/structuremass?smiles={}".format( urllib.parse.quote(smiles_entry)) r = requests.get(url) exact_mass = float(r.text) adducts_to_report = [ "M", "M+H", "M+Na", "M+K", "M+NH4", "M-H", "M+Br", "M+Cl" ] output_list = [] for adduct in adducts_to_report: adduct_mass, charge = get_adduct_mass(exact_mass, adduct) output_dict = {} output_dict["adduct"] = adduct output_dict["charge"] = charge output_dict["mz"] = adduct_mass output_list.append(output_dict) table_fig = dash_table.DataTable( columns=[{ "name": i, "id": i, "deletable": True, "selectable": True } for i in ["adduct", "charge", "mz"]], data=output_list, editable=True, filter_action="native", sort_action="native", sort_mode="multi", column_selectable="single", selected_columns=[], selected_rows=[], page_action="native", page_current=0, page_size=10, ) return [table_fig]
def get_mz_mean_from_formulas(formulas, ms_mode=None, verbose=False): if verbose: print(formulas) masses = [] for formula in formulas: if verbose: print(f'Mass from formula: "{formula}"') try: mass = Formula(formula).isotope.mass except: masses.append(None) continue if ms_mode == 'positive': mass += M_PROTON elif ms_mode == 'negative': mass -= M_PROTON mass = np.round(mass, 4) masses.append(mass) if verbose: print(masses) return masses
def aCSF(): toCheck = { 'NaCl': 124, 'KCl': 5, 'C8H18N2O4S': 20, 'C6H12O6': 10, 'MgCl2': 1.3, 'CaCl2.2H2O': 1.5, 'NaHCO3': 26, 'NaH2PO4.2H2O': 1.25 } volume = 0.4 end = [] for key, value in toCheck.items(): form = Formula(key) tup = (form, '%s' % float('%.3g' % (volume * form.mass * value * 0.001))) end.append(tup) result = dict(end) return render_template('aCSF.html', result=result, volume=volume)
def compute_thermal_velocity(molecule_name, temp): ''' Compute the thermal velocity given a molecule name and temperature Parameters --------- molecule_name: string Molecule name (e.g., 'CO', 'H2O') temp : float Temperature at which to compute thermal velocity Returns ------- v_thermal : float Thermal velocity (m/s) ''' f = Formula(molecule_name) mu = f.isotope.mass * u.value #kg return np.sqrt(k_B.value * temp / mu) #m/s
def get_monoisotopic_mass_formula(formula): f= Formula(formula) monoisotope_mass = f.isotope.mass return(monoisotope_mass)
def ex(formula): if type(formula) is str: return Formula(formula).isotope.mass else: print(formula) return formula
from molmass import Formula RawData = input( "A1+B2 => C3+D4, yield of which, mass of 1 2, A B C D, Coefficients: ") while (RawData != "quit"): SplitData = RawData.split(" ") R1 = Formula(SplitData[0]) R2 = Formula(SplitData[1]) P1 = Formula(SplitData[2]) P2 = Formula(SplitData[3]) PYield = Formula(SplitData[4]) M = [float(SplitData[5]), float(SplitData[6])] C = [ int(SplitData[7]), int(SplitData[8]), int(SplitData[9]), int(SplitData[10]) ] Mr = [R1.mass, R2.mass, P1.mass, P2.mass] Mol = [M[0] / Mr[0], M[1] / Mr[1]] UnitMol0 = Mol[0] / C[0] UnitMol1 = Mol[1] / C[1] Coefficient = 0 if (UnitMol0 > UnitMol1): print(R2.formula, " Limiting") LimitingM = M[1] Limiting = R2 LimitingC = C[1] elif (UnitMol0 < UnitMol1): print(R1.formula, " Limiting") LimitingM = M[0] Limiting = R1
speed = Q(60, 'm/seconds') distance = Q(8, 'm') time = Q(15, 'seconds') speed = distance / time #print(speed.to('m/seconds')) speed = speed.to(u.km / u.hour) c = Q(cnst.c, 'm/s') lightyear = c * Q(1, 'year') print(lightyear.to('m')) #what is the density of NaCl if the a=0.563nm formulas_per_cell = 4 NaCl = Formula('NaCl') mass = formulas_per_cell * Q(NaCl.mass, 'g/mole') / Q(cnst.Avogadro, '1/mole') volume = Q(0.563, 'nm')**3 density = mass / volume print(density.to('grams/cm^3')) #customs units u.define('smoot=1.702m=sm') print(distance.to('sm')) #electrical conductivity sigma=n*mobility*charge n = Q(1e17, 'cm^-3') mu = Q(1.5e4, 'cm^2/V/s') e = Q(cnst.e, 'coulombs') sigma = n * mu * e print(sigma.to('ohms^-1*m^-1'))
def _get_atommass(self): from molmass import Formula f = Formula(self.atom) return (f.mass * u.u / u.ct).to(u.g / u.ct)
def read_content(self): ''' Reads the dict content and returns the needed valued for the opacity models. Returns ------- str molecule name astropy.units.Quantity: molecular mass astropy.units.Quantity: data pressure grid in si units astropy.units.Quantity: data temperature grid in si units astropy.units.Quantity: data wavenumber grid astropy.units.Quantity: data opacities grid in si units ''' global pressure_grid, temperature_grid, wavenumber_grid try: mol = self.input_data['mol'] except KeyError: raise KeyError('molecule name not found') try: mol_mass = self.input_data['mol_mass'] except KeyError: from molmass import Formula f = Formula(mol) mol_mass = (f.mass * u.u / u.ct).to(u.g / u.ct) if any(key in self.input_data.keys() for key in ['p', 'P', 'pressure', 'pressure_grid']): for press_key in ['p', 'P', 'pressure', 'pressure_grid']: try: pressure_grid = self.input_data[press_key] except KeyError: pass if isinstance(pressure_grid, u.Quantity): pressure_grid = pressure_grid.to(u.Pa) else: pressure_grid *= u.Pa else: raise KeyError('Pressure grid not found') if any(key in self.input_data.keys() for key in ['t', 'T', 'temperature', 'temperature_grid']): for temp_key in ['t', 'T', 'temperature', 'temperature_grid']: try: temperature_grid = self.input_data[temp_key] except KeyError: pass if isinstance(temperature_grid, u.Quantity): temperature_grid = temperature_grid.to(u.K) else: temperature_grid *= u.K else: raise KeyError('Temperature grid not found') if any(key in self.input_data.keys() for key in ['wn', 'wavenumber', 'wavenumbers', 'wavenumbers_grid', 'wavenumber_grid']): for wn_key in ['wn', 'wavenumber', 'wavenumbers', 'wavenumbers_grid', 'wavenumber_grid']: try: wavenumber_grid = self.input_data[wn_key] except KeyError: pass if isinstance(wavenumber_grid, u.Quantity): wavenumber_grid = wavenumber_grid.to(1 / u.cm) else: wavenumber_grid /= u.cm else: raise KeyError('Wavenumber grid not found') try: opacities = self.input_data['opacities'] if isinstance(opacities, u.Quantity): opacities = opacities.to(u.m ** 2 / u.kg) else: opacities *= u.m ** 2 / u.kg except KeyError: raise KeyError('opacities not found') return mol, mol_mass, pressure_grid, temperature_grid, wavenumber_grid, opacities, False
def test_mol_mass(self): from molmass import Formula f = Formula("H2O") mol_mass = (f.mass * u.u / u.ct).to(u.g / u.ct) self.assertEqual(self.file_model.mol_mass, mol_mass) self.assertEqual(self.dict_model.mol_mass, mol_mass)
def setLipidFormula(self, text): self.lipid_formula = Formula(text).formula
def get_isotope_intensities(self, lipid_details, filepair, scan_delta=2): filepair[0] = MZMLFile(filepair[0]) scans_in_range = list( filter( lambda x: x.rt_in_seconds >= lipid_details['retentionTime'] - lipid_details['retentionTimeTolerance'] and x.rt_in_seconds <= lipid_details['retentionTime'] + lipid_details[ 'retentionTimeTolerance'], filepair[0].scans)) spectrum = Formula(lipid_details['formula']).spectrum() adduct = lipid_details['adduct'] target_mass = [ mass2ion(x[0], adduct[2], adduct[1]) for x in spectrum.values() ] target_mass.sort() current_mass = target_mass[0] isotopes = [] if lipid_details['massToleranceUnits'] == 'ppm': absolute_mass_tolerance = self.ppm_to_da( current_mass, lipid_details['massTolerance']) else: absolute_mass_tolerance = lipid_details['massTolerance'] max_intensity = 0 max_intensity_index = 0 max_mass = 0 max_retention_time = 0 max_scan_no = 0 for scan in scans_in_range: intensity, exact_mass = self.get_max_mass( scan, current_mass - absolute_mass_tolerance, current_mass + absolute_mass_tolerance) if intensity >= max_intensity: max_intensity = intensity max_intensity_index = scans_in_range.index(scan) max_mass = exact_mass max_retention_time = scan.rt_in_seconds max_scan_no = scan.scan_no isotopes.append((0, current_mass, max_intensity, max_mass, max_retention_time, max_scan_no)) isotope_num = 0 for current_mass in target_mass[1:]: isotope_num += 1 if isotope_num > lipid_details['isotopeDepth']: break max_intensity = 0 max_mass = -1 max_retention_time = None max_scan_no = None if lipid_details['massToleranceUnits'] == 'ppm': absolute_mass_tolerance = self.ppm_to_da( current_mass, lipid_details['massTolerance']) else: absolute_mass_tolerance = lipid_details['massTolerance'] for scan_index in range(max_intensity_index - scan_delta, max_intensity_index + scan_delta + 1): if scan_index >= 0 and scan_index < len(scans_in_range): scan = scans_in_range[scan_index] intensity, exact_mass = self.get_max_mass( scan, current_mass - absolute_mass_tolerance, current_mass + absolute_mass_tolerance) if intensity >= max_intensity: max_intensity = intensity max_mass = exact_mass max_retention_time = scan.rt_in_seconds max_scan_no = scan.scan_no isotopes.append((isotope_num, current_mass, max_intensity, max_mass, max_retention_time, max_scan_no)) return isotopes
def _get_mol_mass(self, mol): from molmass import Formula f = Formula(mol) self.mol_mass = (f.mass * u.u / u.ct).to(u.g / u.ct) return self.mol_mass
def molarM(data): return Formula(data).mass