def make_opsin_input(): seen = set() with open('opsin_input.txt', 'w') as op_out: result_dir = '../examples/mp/results' for filename in os.listdir(result_dir): if not filename.endswith('.json'): continue patent_id = filename[:-9] print(patent_id) tetko_file = '../examples/mp/tetko/%s.json' % patent_id if not os.path.isfile(tetko_file): continue with open('%s/%s' % (result_dir, filename)) as fin: results = json.loads(fin.read().decode('utf8')) with open(tetko_file) as tin: tetko_results = json.loads(tin.read().decode('utf8')) for result in results: if 'names' not in result: continue for name in result['names']: in_tetko = False for tetko_result in tetko_results: if excess_normalize(name) == excess_normalize( tetko_result['name']): in_tetko = True break if not in_tetko and name not in seen: seen.add(name) op_out.write(('%s\n' % name).encode('utf8'))
def _get_standardized_result(result, tetko_results, n2s): names = sorted(result['names'], key=len, reverse=True) fv = float_value(standardize_value(result['melting_points'][0]['value'])) if not fv: print('INVALID') print(names) print(result['melting_points'][0]['value']) return for name in names: for tetko_result in tetko_results: if excess_normalize(name) == excess_normalize( tetko_result['name']): return { 'name': name, 'smiles': tetko_result['smiles'], 'inchikey': tetko_result['inchikey'], 'value': result['melting_points'][0]['value'], 'float_value': fv } for name in names: if name not in n2s: print('MISSING: %s' % name) continue # results = cirpy.query(name.encode('utf8'), 'smiles', ['name_by_opsin', 'name_by_cir']) # print(name) # print([(r.value, r.resolver) for r in results]) # n2s[name] = [(r.value, r.resolver) for r in results] # with open('n2s.pickle', 'w') as fout: # pickle.dump(n2s, fout) smiles = n2s[name] if smiles: if '.' in smiles: continue mol = Chem.MolFromSmiles(smiles) if mol: inchikey = Chem.InchiToInchiKey(Chem.MolToInchi(mol)) return { 'name': name, 'smiles': smiles, 'inchikey': inchikey, 'value': result['melting_points'][0]['value'], 'float_value': fv }