コード例 #1
0
ファイル: csifingerid.py プロジェクト: Mrqeoqqt/chemdistiller
    def configure_scorer(self, peak, annotation):
        peak = annotation.parent_peak

        if 'csifingerid_count' in peak.parameters:
            self.csicount = int(peak.parameters['csifingerid_count'])
            self.csi_fpts = []
            self.csi_formulas = []
            self.csi_formulascores = []
            self.csi_fpt_masks = []
            for i in range(1, self.csicount + 1):
                fscore = float(peak.parameters['csifingerid_score_%s' % i])
                if fscore > 0.0:
                    pred_fpt = peak.parameters['csifingerid_predfpt_%s' % i]
                    formula = peak.parameters['csifingerid_formula_%s' % i]
                    mask = peak.parameters['csifingerid_fptmask_%s' % i]

                    #print(len(mask))
                    #print(mask)
                    mask = np.unpackbits(decode_from_base64(mask))

                    fpt = pred_fpt.split(',')
                    for i in range(len(fpt)):
                        fpt[i] = float(fpt[i])
                    fpt = np.array(fpt, dtype=np.float64)

                    full_fpt = np.zeros(mask.shape, dtype=np.float64)
                    full_fpt[mask > 0] = fpt[:]

                    self.csi_fpts.append(full_fpt)
                    self.csi_formulas.append(FormulasFilter(formula))
                    self.csi_fpt_masks.append(mask)
                    self.csi_formulascores.append(fscore)
            self.csicount = len(self.csi_fpts)
            return 0
        else:
            '''if os.path.isfile('e:/Imperial/TestDB/HDF5_CSIFingerID_SmallDB_new/errorlist.txt'):
                with open('e:/Imperial/TestDB/HDF5_CSIFingerID_SmallDB_new/errorlist.txt', 'a') as fout:
                    fout.write('%s,%s\n'%(peak.parent_spectrum.parameters['inchi'],peak.parent_spectrum.parameters['mode']));
            else:
                with open('e:/Imperial/TestDB/HDF5_CSIFingerID_SmallDB_new/errorlist.txt', 'w') as fout:
                    fout.write('%s,%s\n'%(peak.parent_spectrum.parameters['inchi'],peak.parent_spectrum.parameters['mode']));
            '''

            return -1
コード例 #2
0
def generate_fpt(index, peak, subpath, mode, mask):
    fptcount = int(peak.parent_spectrum.parameters['fptcount'])
    fpt = np.zeros((len(mask), ), dtype=np.float32)
    for i in range(fptcount):
        f = peak.parent_spectrum.parameters['fpt_%s' % i]
        f = decode_from_base64(f)
        f = np.array(np.unpackbits(f), dtype=np.float32)
        fpt = np.add(fpt, f)
    fpt = np.divide(fpt, fptcount)
    fpt = np.rint(fpt)
    fpt = np.subtract(np.multiply(fpt, 2), 1)

    batch = int(peak.parent_spectrum.parameters['crossvalidation_batch_index'])
    subpath = subpath + '/%s' % batch
    if not os.path.exists(subpath):
        os.makedirs(subpath)
    fname = subpath + '/%s.fpt' % index

    with open(fname, 'w') as fout:
        for i in range(len(mask)):
            if mask[i] > 0:
                fout.write('%s ' % fpt[i])
コード例 #3
0
ファイル: database.py プロジェクト: Mrqeoqqt/chemdistiller
    def _get_next_raw_record(self):
        if self.currentfile=='':
            self.currentfile=os.path.join(self.database_path, self.db_name, self.subf,
                                          str(self.mzcurrent//1000),
                                          str(self.mzcurrent%1000//100),
                                          str(self.mzcurrent%100//10),
                                          '%s.st2'%str(self.mzcurrent%10));
            while (not os.path.isfile(self.currentfile)) and (self.mzcurrent<=self.mzmax_int):
                self.mzcurrent+=1;
                self.currentfile=os.path.join(self.database_path, self.db_name, self.subf,
                                          str(self.mzcurrent//1000),
                                          str(self.mzcurrent%1000//100),
                                          str(self.mzcurrent%100//10),
                                          '%s.st2'%str(self.mzcurrent%10));
            if self.mzcurrent<=self.mzmax_int:
                self.datafile=open(self.currentfile,'r');
                self.record_index=-1;
            else:
                raise StopIteration();                
        
        s=self.datafile.readline();
        self.record_index+=1;
        while s=='':
            self.datafile.close();
            self.mzcurrent+=1;
            self.currentfile=os.path.join(self.database_path, self.db_name, self.subf,
                                          str(self.mzcurrent//1000),
                                          str(self.mzcurrent%1000//100),
                                          str(self.mzcurrent%100//10),
                                          '%s.st2'%str(self.mzcurrent%10));
            while (not os.path.isfile(self.currentfile)) and (self.mzcurrent<=self.mzmax_int):
                self.mzcurrent+=1;
                self.currentfile=os.path.join(self.database_path, self.db_name, self.subf,
                                          str(self.mzcurrent//1000),
                                          str(self.mzcurrent%1000//100),
                                          str(self.mzcurrent%100//10),
                                          '%s.st2'%str(self.mzcurrent%10));
            if self.mzcurrent<=self.mzmax_int:
                self.datafile=open(self.currentfile,'r');
                self.record_index=-1;
            else:
                raise StopIteration();                
            s=self.datafile.readline();
            self.record_index+=1;
            
        s=s.rstrip('\n').split('\t');
        record=MolecularRecord();
        record['MZ']=float(s[0]);
        if self.charged:
            record['Mass']=float(s[1]);
            record['Charge']=float(s[2]);
        else:
            record['Mass']=record['MZ'];
            record['Charge']=0;
        if 'ShortInChI' in self.required_fields:            
            record['ShortInChI']=parse_inchi(s[2+self.offs])[0];
            
        if 'InChI' in self.required_fields:
            record['InChI']=s[2+self.offs];

        if 'SMILES' in self.required_fields:    
            record['SMILES']=s[3+self.offs];

        if 'IDs' in self.required_fields:            
            record['IDs']=s[4+self.offs];

        if 'FPT' in self.required_fields:
            record['FPT']=decode_from_base64(s[5+self.offs]);
            # Mask FPT here !

        if 'Frag' in self.required_fields:
            record['Frag']=s[6+self.offs];
            if self.charged:
                record['FragCharge']=s[9];

        if 'InChIKeyValues' in self.required_fields:        
            record['InChIKeyValues']=inchikeyvalues_from_inchi(s[2+self.offs]);
            
        if 'InChIKey' in self.required_fields:        
            record['InChIKey']=inchikey_from_inchi(s[2+self.offs]);
        
        if ('Formula' in self.required_fields) or ('ElementVector' in self.required_fields) or ('FormulaVector' in self.required_fields):       
            fla=parse_formula(s[1+self.offs].split('/')[0]);
            
            if 'Formula' in self.required_fields:
                record['Formula']=fla;
        
            if 'ElementVector' in self.required_fields:
                record['ElementVector']=formula_to_element_vector(fla);
            
            if 'FormulaVector' in self.required_fields:
                record['FormulaVector']=encode_formula_to_array(fla);
        

            
        return record;
コード例 #4
0
    result=dbmanager.query_by_mz_scored(mz, 20, charge, db_indexes=dbmanager.db_indexes_from_db_names(test_chemical_databases,case_sensitive=False),\
    filters=[inchifilter], scorers=[], required_fields=set(['FPT']), results_limit=-1, save_memory=False)
    ff = []
    if len(result.mol_list) == 0:
        missing += 1
        print('Missing: %s' % missing)
    else:
        for mol in result.mol_list:
            ff.append(encode_to_base64(mol['FPT']))
        ff = set(ff)
        spectrum.parameters['FPTCount'] = len(ff)
        cc = -1
        for subfpt in ff:
            cc += 1
            spectrum.parameters['FPT_%s' % cc] = subfpt
            subfpt = decode_from_base64(subfpt)
            subfpt = np.unpackbits(subfpt)
            fpts = np.add(fpts, subfpt)
#%%
print('Exporting spectra')
specmanager.export_textfile_spectra_to_folder(test_spectral_database_outpath)
#%%
#test_fpt_stat_outfile='e:/Imperial/TestDB/FPT.txt';

print('Exporting FPT stats')
fout = open(test_fpt_stat_outfile, 'w')
for i in range(11416):
    fout.write('%s\t%s\n' % (i, fpts[i]))
fout.close()

#%%
コード例 #5
0
    def hdf5_import_from_st2raw(self,
                                inpath,
                                fptmask=np.ones((11416, ), np.uint8)):
        if not os.path.isfile(os.path.join(inpath, 'dbinfo.dat')):
            raise IOError('Database info file not found: %s' %
                          os.path.join(inpath, 'dbinfo.dat'))

        self.HDF5container.attrs['HDF5ContainerType'] = np.string_(
            'DistilledChemicalDatabase')
        self.HDF5container.attrs['HDF5ContainerVersion'] = np.string_('1.0')
        finp = open(os.path.join(inpath, 'dbinfo.dat'), 'r')
        #fout=open(os.path.join(self.folderpath,'dbinfo.dat'),'w');
        for s in finp:
            s = s.rstrip('\n').lstrip().split('=', 1)
            if s[0].upper() == 'DBFORMAT':
                s[1] = '3'
            if s[0] != '':
                #fout.write('%s=%s\n'%(s[0],s[1]));
                self.HDF5container.attrs[s[0]] = np.string_(s[1])

        #fout.close();
        finp.close()

        fptlist = []

        for i in range(11416):
            if fptmask[i] == 1:
                fptlist.append(i)

        fptlen = len(fptlist)
        fptsubmask = np.packbits(np.ones((fptlen, ), np.uint8))
        fptmasklen = len(fptsubmask)
        fptindexes = np.array(fptlist, dtype=np.uint32)
        packedmask = np.packbits(fptmask)
        packedmasklen = len(packedmask)

        #hdf5_ascii_string = h5py.special_dtype(vlen=bytes);

        fptgroup = self.HDF5container.create_group('FingerPrints')
        #Original mask, packed
        fptoriginalmask = fptgroup.create_dataset("FPTOriginalMask",
                                                  (packedmasklen, ),
                                                  maxshape=(packedmasklen, ),
                                                  dtype=np.uint8)
        fptoriginalmask[:] = packedmask[:]

        #List of indeces of original FPT bits (11416)
        fptmask = fptgroup.create_dataset("FPTMask", (fptlen, ),
                                          maxshape=(fptlen, ),
                                          compression="gzip",
                                          compression_opts=4,
                                          dtype=np.uint32)
        fptmask[:] = fptindexes[:]

        #Mask for working bits (packed)
        fptsubmask = fptgroup.create_dataset("FPTsubmask", (1, fptmasklen),
                                             chunks=(100, fptmasklen),
                                             maxshape=(None, fptmasklen),
                                             compression="gzip",
                                             compression_opts=4,
                                             dtype=np.uint8)

        #FPT info: 0 - original bit count=11416, 1 - length of new fpt after masking, 2 - length of the packed fpt, 3 - No of padding bits
        fptinfo = fptgroup.create_dataset("FPTInfo", (4, ),
                                          maxshape=(4, ),
                                          dtype=np.uint32)
        fptinfo[0] = 11416
        fptinfo[1] = fptlen
        fptinfo[2] = fptmasklen
        fptinfo[3] = fptmasklen * 8 - fptlen

        print('Listing input files')

        subpaths = ['/Negative', '/Positive', '/Neutral']
        #subpaths=['/Positive'];
        for subpath in subpaths:
            print(subpath)

            fptgroup = self.HDF5container.create_group(subpath +
                                                       '/FingerPrints')
            fraggroup = self.HDF5container.create_group(subpath +
                                                        '/FragPrints')
            chemgroup = self.HDF5container.create_group(subpath + '/ChemInfo')
            chargegroup = self.HDF5container[subpath]

            #New FPT Array, packed and trimmed to fptmask
            fptdataset = fptgroup.create_dataset("FPTArray", (1, fptmasklen),
                                                 chunks=(100, fptmasklen),
                                                 maxshape=(None, fptmasklen),
                                                 compression="gzip",
                                                 compression_opts=4,
                                                 dtype=np.uint8)

            if subpath != '/Neutral':
                masschargedataset = chargegroup.create_dataset(
                    "MZMassCharge", (1, 3),
                    chunks=(10000, 3),
                    maxshape=(None, 3),
                    compression="gzip",
                    compression_opts=4,
                    dtype=np.float32)
            else:
                mzdataset = chargegroup.create_dataset("MZ", (1, ),
                                                       chunks=(10000, ),
                                                       maxshape=(None, ),
                                                       compression="gzip",
                                                       compression_opts=4,
                                                       dtype=np.float32)

            inchikey_dataset = chemgroup.create_dataset("InChiKeyValues",
                                                        (1, 15),
                                                        chunks=(10000, 15),
                                                        maxshape=(None, 15),
                                                        compression="gzip",
                                                        compression_opts=4,
                                                        dtype=np.uint8)

            elementsvector_dataset = chemgroup.create_dataset(
                "ElementsVector", (1, 12),
                chunks=(10000, 12),
                maxshape=(None, 12),
                compression="gzip",
                compression_opts=4,
                dtype=np.uint8)

            formulavector_dataset = chemgroup.create_dataset(
                "FormulaVector", (1, 96),
                chunks=(10000, 96),
                maxshape=(None, 96),
                compression="gzip",
                compression_opts=4,
                dtype=np.uint16)

            fragprintindex_dataset = fraggroup.create_dataset(
                "FragPrintIndex", (1, 2),
                chunks=(10000, 2),
                maxshape=(None, 2),
                compression="gzip",
                compression_opts=4,
                dtype=np.int64)
            fragprintvalues_dataset = fraggroup.create_dataset(
                "FragPrintValues", (1, ),
                chunks=(10000, ),
                maxshape=(None, ),
                compression="gzip",
                compression_opts=4,
                dtype=np.float32)

            smiles_dataset = chemgroup.create_dataset("SMILES", (1, 2),
                                                      chunks=(10000, 2),
                                                      maxshape=(None, 2),
                                                      compression="gzip",
                                                      compression_opts=4,
                                                      dtype=np.int64)

            inchi_dataset = chemgroup.create_dataset("InChi", (1, 4, 2),
                                                     chunks=(10000, 4, 2),
                                                     maxshape=(None, 4, 2),
                                                     compression="gzip",
                                                     compression_opts=4,
                                                     dtype=np.int64)

            ids_dataset = chemgroup.create_dataset("IDs", (1, 2),
                                                   chunks=(10000, 2),
                                                   maxshape=(None, 2),
                                                   compression="gzip",
                                                   compression_opts=4,
                                                   dtype=np.int64)

            ascii_dataset = chemgroup.create_dataset("ASCII", (1, ),
                                                     chunks=(10000, ),
                                                     maxshape=(None, ),
                                                     compression="gzip",
                                                     compression_opts=4,
                                                     dtype=np.uint8)

            recordindex = -1

            fileslist = []

            for i in range(0, 2000):
                if os.path.exists(inpath + subpath + '/%s' % i):
                    print(inpath + subpath + '/%s' % i)
                    for j in range(0, 10):
                        if os.path.exists(inpath + subpath + '/%s/%s' %
                                          (i, j)):
                            for k in range(0, 10):
                                if os.path.exists(inpath + subpath +
                                                  '/%s/%s/%s' % (i, j, k)):
                                    for l in range(0, 10):
                                        if os.path.isfile(inpath + subpath +
                                                          '/%s/%s/%s/%s.st2' %
                                                          (i, j, k, l)):
                                            fileslist.append(
                                                inpath + subpath +
                                                '/%s/%s/%s/%s.st2' %
                                                (i, j, k, l))

            print('Total number of input files: %s' % len(fileslist))

            for filename in fileslist:
                fpath, fname = os.path.split(filename)
                subpath = fpath.replace(inpath, '')
                if 'Neutral' in subpath:
                    charged = False
                    offs = 0
                else:
                    charged = True
                    offs = 2
                print('Importing: .../%s/%s' % (subpath, fname))
                dblist = []
                with open(filename, 'r') as finp:
                    for s in finp:
                        try:
                            s = s.replace('\n', '').replace('\r',
                                                            '').split('\t')
                            mz = float(s[0])
                            if charged:
                                mass = float(s[1])
                                charge = float(s[2])
                            else:
                                mass = mz
                                charge = 0.0
                            if mass >= 12.0:
                                #shortinchi=s[1+offs];
                                inchi = s[2 + offs]
                                smiles = s[3 + offs]
                                ids = s[4 + offs]
                                fpt = s[5 + offs]
                                fpt = decode_from_base64(fpt)
                                fpt = np.unpackbits(fpt)
                                frag = s[6 + offs]
                                if charged:
                                    fragcharge = s[9]
                                else:
                                    fragcharge = ''
                                recordindex += 1
                                if recordindex % 1000 == 0:
                                    print('Total: %s' % (recordindex + 1))

                                dblist.append([
                                    recordindex, mz, charged, mass, charge,
                                    inchi, fpt, frag, fragcharge, smiles, ids
                                ])
                        except:
                            print('Error! Skipping!')

                if len(dblist) > 0:
                    #expand datasets here
                    fptdataset.resize((recordindex + 1, fptmasklen))

                    if charged:
                        masschargedataset.resize((recordindex + 1, 3))
                    else:
                        mzdataset.resize((recordindex + 1, ))

                    inchikey_dataset.resize((recordindex + 1, 15))

                    elementsvector_dataset.resize((recordindex + 1, 12))

                    formulavector_dataset.resize((recordindex + 1, 96))

                    fragprintindex_dataset.resize((recordindex + 1, 2))

                    smiles_dataset.resize((recordindex + 1, 2))

                    ids_dataset.resize((recordindex + 1, 2))

                    inchi_dataset.resize((recordindex + 1, 4, 2))

                    for db in dblist:
                        currentindex = db[0]
                        fptdataset[currentindex, :] = np.packbits(
                            db[6][fptindexes])[:]
                        #print(inchi)
                        inchi = parse_inchi(db[5])
                        #print(inchi)
                        inchikeyvalues = inchikeyvalues_from_inchi(db[5])

                        sformula = inchi[0].split('/', 1)[0]

                        #print(sformula);
                        formula = parse_formula(sformula)

                        elementsvector = formula_to_element_vector(formula)
                        encodedformula = encode_formula_to_array(formula)

                        charge = db[4]
                        charged = db[2]

                        if charged:
                            #print(db[7],db[8])
                            frags = parse_string_fragment_charges(
                                charge, db[7], db[8])
                            #print(frags)
                        else:
                            frags = parse_string_fragments(db[7])

                        if charged:
                            masschargedataset[currentindex, 0] = db[1]
                            masschargedataset[currentindex, 1] = db[3]
                            masschargedataset[currentindex, 2] = charge
                        else:
                            mzdataset[currentindex] = db[1]

                        inchikey_dataset[currentindex, :] = inchikeyvalues[:]

                        elementsvector_dataset[
                            currentindex, :] = elementsvector[:]

                        formulavector_dataset[
                            currentindex, :] = encodedformula[:]

                        fragcount = len(frags)
                        frags = np.array(frags, dtype=np.float32)
                        fragprintindex_dataset[currentindex,
                                               0] = self.fragprintpos
                        fragprintindex_dataset[
                            currentindex, 1] = self.fragprintpos + fragcount

                        fragprintvalues_dataset.resize(
                            (self.fragprintpos + fragcount, ))

                        fragprintvalues_dataset[self.fragprintpos:self.
                                                fragprintpos +
                                                fragcount] = frags[:]

                        self.fragprintpos += fragcount

                        smiles = bytearray(db[9].encode('ascii'))

                        smileslen = len(smiles)

                        smiles = np.array(smiles, dtype=np.uint8)

                        ids = bytearray(db[10].encode('ascii'))

                        idslen = len(ids)

                        ids = np.array(ids, dtype=np.uint8)

                        sinchi = inchi[0].split('/', 1)
                        if len(sinchi) > 1:
                            sinchi = sinchi[1]
                        else:
                            sinchi = ''

                        inchi0 = bytearray(sformula.encode('ascii'))
                        inchi1 = bytearray(sinchi.encode('ascii'))
                        inchi2 = bytearray(inchi[2].encode('ascii'))
                        inchi3 = bytearray(inchi[1].encode('ascii'))

                        inchi0len = len(inchi0)
                        inchi1len = len(inchi1)
                        inchi2len = len(inchi2)
                        inchi3len = len(inchi3)

                        inchi0 = np.array(inchi0, dtype=np.uint8)
                        inchi1 = np.array(inchi1, dtype=np.uint8)
                        inchi2 = np.array(inchi2, dtype=np.uint8)
                        inchi3 = np.array(inchi3, dtype=np.uint8)

                        ascii_dataset.resize(
                            (self.asciipos + smileslen + idslen + inchi0len +
                             inchi1len + inchi2len + inchi3len, ))

                        smiles_dataset[currentindex, 0] = self.asciipos
                        smiles_dataset[currentindex,
                                       1] = self.asciipos + smileslen
                        ascii_dataset[self.asciipos:self.asciipos +
                                      smileslen] = smiles[:]
                        self.asciipos += smileslen

                        ids_dataset[currentindex, 0] = self.asciipos
                        ids_dataset[currentindex, 1] = self.asciipos + idslen
                        ascii_dataset[self.asciipos:self.asciipos +
                                      idslen] = ids[:]
                        self.asciipos += idslen

                        inchi_dataset[currentindex, 0, 0] = self.asciipos
                        inchi_dataset[currentindex, 0,
                                      1] = self.asciipos + inchi0len
                        ascii_dataset[self.asciipos:self.asciipos +
                                      inchi0len] = inchi0[:]
                        self.asciipos += inchi0len

                        inchi_dataset[currentindex, 1, 0] = self.asciipos
                        inchi_dataset[currentindex, 1,
                                      1] = self.asciipos + inchi1len
                        ascii_dataset[self.asciipos:self.asciipos +
                                      inchi1len] = inchi1[:]
                        self.asciipos += inchi1len

                        inchi_dataset[currentindex, 2, 0] = self.asciipos
                        inchi_dataset[currentindex, 2,
                                      1] = self.asciipos + inchi2len
                        ascii_dataset[self.asciipos:self.asciipos +
                                      inchi2len] = inchi2[:]
                        self.asciipos += inchi2len

                        inchi_dataset[currentindex, 3, 0] = self.asciipos
                        inchi_dataset[currentindex, 3,
                                      1] = self.asciipos + inchi3len
                        ascii_dataset[self.asciipos:self.asciipos +
                                      inchi3len] = inchi3[:]
                        self.asciipos += inchi3len

        print('Import Finished!')
コード例 #6
0
ファイル: results.py プロジェクト: Mrqeoqqt/chemdistiller
    def _pipe_from_textfile(self, finp):
        while True:
            s = finp.readline()
            if s == '':
                return
            s = s.rstrip('\n').lstrip()
            if '##' in s:
                s = s[:s.index('##')]

            if '=' in s:
                s = s.split('=', 1)
                if s[0].lower().startswith('totalscore'):
                    self['TotalScore'] = float(s[1])

                elif s[0].lower().startswith('adduct'):
                    self['Adduct'] = s[1]
                elif s[0].lower().startswith('isotopeextramass'):
                    self['IsotopeExtraMass'] = float(s[1])
                elif s[0].lower().startswith('isotope'):
                    self['Isotope'] = int(s[1])

                elif s[0].lower().startswith('mz'):
                    self['MZ'] = float(s[1])
                elif s[0].lower().startswith('mass'):
                    self['Mass'] = float(s[1])
                elif s[0].lower().startswith('charge'):
                    self['Charge'] = int(s[1])
                elif s[0].lower().startswith('dbformat'):
                    self['DBFormat'] = int(s[1])
                elif s[0].lower().startswith('dbindex'):
                    self['DBIndex'] = int(s[1])
                elif s[0].lower().startswith('rindex'):
                    self['RIndex'] = int(s[1])
                elif s[0].lower().startswith('dbname'):
                    self['DBName'] = s[1]
                elif s[0].lower().startswith('rfile'):
                    self['RFile'] = s[1]
                elif s[0].lower().startswith('smiles'):
                    self['SMILES'] = s[1]
                elif s[0].lower().startswith('ids'):
                    self['IDs'] = s[1]
                elif s[0].lower().startswith('annotation'):
                    self['Annotation'] = s[1]
                elif s[0].lower().startswith('shortinchi'):
                    self['ShortInChI'] = s[1]
                elif s[0].lower().startswith('inchikeyvalues'):
                    self['InChIKeyValues'] = string_to_numpy_byte_array(s[1])
                elif s[0].lower().startswith('inchikey'):
                    self['InChiKey'] = s[1]
                elif s[0].lower().startswith('inchi'):
                    self['InChI'] = s[1]
                elif s[0].lower().startswith('formulavector'):
                    self['FormulaVector'] = string_to_numpy_uint16_array(s[1])
                elif s[0].lower().startswith('elementvector'):
                    self['ElementVector'] = string_to_numpy_byte_array(s[1])
                elif s[0].lower().startswith('frag'):
                    self['Frag'] = string_to_float_list(s[1])
                elif s[0].lower().startswith('formula'):
                    self['Formula'] = parse_formula(s[1])
                elif s[0].lower().startswith('fpt'):
                    self['FPT'] = decode_from_base64(s[1])
                elif s[0].lower().startswith('scores'):
                    if not ('Scores' in self):
                        self['Scores'] = {}
                    score = s[1].split(':', 1)
                    self['Scores'][score[0]] = float(score[1])

            elif s.lower().startswith('end'):
                return
コード例 #7
0
positivebatch = []
negativebatch = []

scount = 0
for spectrum in specmanager.ms_spectra:
    scount += 1
    print('%s of %s' % (scount, len(specmanager.ms_spectra)))
    fpts = []
    fptcount = int(spectrum.parameters['fptcount'])
    ff = set()

    for i in range(fptcount):
        ff.add(spectrum.parameters['fpt_%s' % i])

    for fpt in ff:
        fpts.append(np.unpackbits(decode_from_base64(fpt)))

    batch = int(spectrum.parameters['crossvalidation_batch_index'])
    for peak in spectrum.peaks:
        ion_type = ''
        if 'ion_type' in peak.parameters:
            ion_type = peak.parameters['ion_type']
        elif peak.ms_spectra:
            ion_type = peak.ms_spectra[0].parameters['precursor_ion']
        if ion_type == '[M+H]+' or ion_type == '[M-H]-':
            subcount = 0
            vector = np.zeros((20000, 1), dtype=np.float32)
            values = []
            for subspectrum in peak.ms_spectra:
                if subspectrum.parameters['level'] == 2:
                    subspectrum.normalize_to_one()