def match_decon2ls_isos(spectrum_db, hypothesis, adducts=None, ms1_match_tolerance=1e-5, output_path=None):
    if adducts is None:
        adducts = [NoShift]

    results = RecordDatabase(output_path, record_type=hypothesis.record_type)
    results.apply_schema()

    for composition in hypothesis:
        matches = []
        scans_searched = set()
        charge_states = set()
        for adduct in adducts:
            for spectrum in spectrum_db.ppm_match_tolerance_search(
                    composition.intact_mass + adduct.mass, ms1_match_tolerance):
                match_ppm = ppm_error(composition.intact_mass + adduct.mass, spectrum.neutral_mass)
                match = PrecursorMatch(
                    str(composition.id) + ":" + adduct.name, spectrum.neutral_mass, match_ppm,
                    spectrum.get("abundance"), spectrum.charge, spectrum.other_data,
                    spectrum.scan_ids[0])
                matches.append(match)
                scans_searched.update(spectrum.scan_ids)
                charge_states.add(spectrum.charge)

        groups = groupby(matches, match_key_getter)

        composition.precursor_matches = {key: combine_results(group) for key, group in groups.items()}
        composition.precursor_scans_searched = scans_searched
        composition.precursor_scan_density = density(scans_searched)
        composition.precursor_charge_states = charge_states
        results.load_data([composition], set_id=False, cast=True, commit=False)
    results.commit()
    results.apply_indices()
    return results
Example #2
0
def download_all_structures(db_path, record_type=GlycanRecordWithTaxon):  # pragma: no cover
    response = requests.get(u'http://www.glycome-db.org/http-services/getStructureDump.action?user=eurocarbdb')
    response.raise_for_status()
    handle = gzip.GzipFile(fileobj=StringIO(response.content))
    xml = etree.parse(handle)
    db = RecordDatabase(db_path, record_type=record_type)
    misses = []
    i = 0
    for structure in xml.iterfind(".//structure"):
        try:
            glycomedb_id = int(structure.attrib['id'])
            i += 1
            glycoct_str = structure.find("sequence").text
            taxa = [Taxon(t.attrib['ncbi'], None, None) for t in structure.iterfind(".//taxon")]
            glycan = glycoct.loads(glycoct_str)
            if (glycoct.loads(str(glycan)).mass() - glycan.mass()) > 0.00001:
                raise Exception("Mass did not match on reparse")
            record = record_type(glycan, taxa=taxa, id=glycomedb_id)
            db.load_data(record, commit=False, set_id=False)
            if i % 1000 == 0:
                print(i, "Records parsed.")
        except Exception as e:
            misses.append((glycomedb_id, e))
            print(glycomedb_id, e)
    db.set_metadata("misses", misses)
    db.commit()
    return db