Esempio n. 1
0
        try:
            mass = CalcExactMolWt(Chem.MolFromSmiles(smi))
        except:
            continue
        molwt.append(mass)
        smiles.append(smi)
        spec.append(ms2vec(ms[:, 0], ms[:, 1]))

    mlp = pd.read_csv('Fingerprint/results/mlp_result.txt',
                      sep='\t',
                      header=None)
    mlp.columns = ['id', 'accuracy', 'precision', 'recall', 'f1']
    fpkeep = mlp['id'][np.where(mlp['f1'] > 0.5)[0]]

    spec = np.array(spec)
    pred_fps = predict_fingerprint(
        spec, fpkeep)  # predict fingerprint of the "unknown"

    nist_smiles = np.array(json.load(open('DeepEI/data/all_smiles.json')))
    nist_masses = np.load('DeepEI/data/molwt.npy')
    nist_fps = load_npz('DeepEI/data/fingerprints.npz')
    nist_fps = csr_matrix(
        nist_fps)[:, fpkeep].todense()  # fingerprints of nist compounds
    nist_spec = load_npz('DeepEI/data/peakvec.npz').todense()

    pred_spec = np.load(
        'DeepEI/data/neims_spec_massbank.npy')  # spectra predicted by NEIMS

    output = pd.DataFrame(columns=['smiles', 'mass', 'score', 'rank'])
    for i in tqdm(range(len(smiles))):
        smi = smiles[i]
        std_smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi))
Esempio n. 2
0
    keep, :]

neims_msbk_smiles = np.array(
    json.load(open('DeepEI/data/neims_msbk_smiles.json')))
neims_msbk_masses = np.load('DeepEI/data/neims_msbk_masses.npy')
neims_msbk_spec = load_npz('DeepEI/data/neims_spec_msbk.npz').todense()
neims_msbk_cdkfps = load_npz('DeepEI/data/neims_msbk_cdkfps.npz').todense()

msbk_smiles = np.array(json.load(open('DeepEI/data/msbk_smiles.json')))
msbk_masses = np.load('DeepEI/data/msbk_masses.npy')
msbk_spec = load_npz('DeepEI/data/msbk_spec.npz').todense()

mlp = pd.read_csv('Fingerprint/results/mlp_result.txt', sep='\t', header=None)
mlp.columns = ['id', 'accuracy', 'precision', 'recall', 'f1']
fpkeep = mlp['id'][np.where(mlp['f1'] > 0.5)[0]]
pred_fps = predict_fingerprint(msbk_spec, fpkeep)

db_smiles = np.array(list(nist_smiles) + list(neims_msbk_smiles))
db_masses = np.append(nist_masses, neims_msbk_masses)
db_spec = np.append(neims_nist_spec, neims_msbk_spec, axis=0)
db_fingerprints = np.append(nist_fingerprint, neims_msbk_cdkfps,
                            axis=0)[:, fpkeep]

if __name__ == '__main__':

    output = pd.DataFrame(
        columns=['smiles', 'mass', 'score', 'rank', 'inNIST'])
    for i, smi in enumerate(tqdm(msbk_smiles)):

        specr = msbk_spec[i]  # true spectrum
        mass = msbk_masses[i]  # true mol weight
Esempio n. 3
0
    test_rindex = test_ri[i,0]
    test_spec = test_spec[i,:]
    test_mass = test_masses[i]
    test = i


    # only keep fingerprint with f1 > 0.5
    mlp = pd.read_csv('Fingerprint/results/mlp_result.txt', sep='\t', header=None)
    mlp.columns = ['id', 'accuracy', 'precision', 'recall', 'f1']
    fpkeep = mlp['id'][np.where(mlp['f1'] > 0.5)[0]]
    
    cdk_fp = load_npz('DeepEI/data/fingerprints.npz')
    cdk_fp = csr_matrix(cdk_fp)[:, fpkeep].todense()
    
    # predict fingerprints via ms
    pred_fp = predict_fingerprint(test_spec, fpkeep)
    
    # rank
    output = pd.DataFrame(columns=['smiles', 'mass', 'true RI', 'predict RI', 'mass filter', 'RI filter', 'Without filter'])
    for i in tqdm(range(len(test))):
        smi = test_smiles[i]
        mass = test_mass[i]
        ri = test_rindex[i]
        pred_fpi = pred_fp[i,:]
        trueindex = np.where(all_smiles == smi)[0][0]
        
        # mass filter
        candidate = np.where(np.abs(all_masses - mass) < 5)[0]
        w_true = np.where(candidate==trueindex)[0]
        if len(w_true)==0:
            rank_mass = 99999