def test_file_to_dict_list(): """ GIVEN compound files of different types (.tsv, .csv, and .json) WHEN the file contents are converted to a list of compound dicts THEN check that the list of compound dicts is produced as expected """ res_7 = OrderedDict([('id', 'cpd01211'), ('abbreviation', 'tcynt'), ('name', 'Thiocyanate'), ('formula', 'CNS'), ('mass', '58'), ('source', 'ModelSEED'), ('structure', 'InChI=1S/CHNS/c2-1-3/h3H'), ('charge', '-1'), ('is_core', '1'), ('is_obsolete', '0'), ('linked_compound', 'null'), ('is_cofactor', '0'), ('deltag', '22.2'), ('deltagerr', '5.68687'), ('pka', '3:0.5'), ('pkb', ''), ('abstract_compound', 'null'), ('comprised_of', 'null'), ('aliases', 'null')]) filenames = [ 'test_compounds.tsv', 'test_compounds.csv', 'test_compounds.json' ] for file in filenames: res = utils.file_to_dict_list(DATA_DIR + file) assert len(res) == 15 assert res[7] == res_7
def load_compound_set(self, compound_file=None, structure_field=None, id_field='id'): """If a compound file is provided, this function loads the compounds into it's internal dictionary. If not, it attempts to find the compounds in it's associated MINE database. :param compound_file: Path to a file containing compounds as tsv :type compound_file: basestring :param structure_field: the name of the column containing the structure incarnation as Inchi or SMILES (Default:'structure') :type structure_field: str :param id_field: the name of the column containing the desired compound ID (Default: 'id) :type id_field: str :return: compound SMILES :rtype: list """ compound_smiles = [] if compound_file: for line in utils.file_to_dict_list(compound_file): mol = self._mol_from_dict(line, structure_field) if not mol: continue # Add compound to internal dictionary as a starting # compound and store SMILES string to be returned smi = AllChem.MolToSmiles(mol, True) _id = line[id_field] # Do not operate on inorganic compounds if "C" in smi or "c" in smi: AllChem.SanitizeMol(mol) self._add_compound(_id, smi, mol=mol, type='Starting Compound') compound_smiles.append(smi) # If a MINE database is being used instead, search for compounds # annotated as starting compounds and return those as a list of # SMILES strings elif self.mine: db = MINE(self.mine) for compound in db.compounds.find(): _id = compound['_id'] smi = compound['SMILES'] # Assume unannotated compounds are starting compounds if 'type' not in compound: compound['Type'] = 'Starting Compound' self._add_compound(_id, smi, type=compound['Type']) compound_smiles.append(smi) else: raise ValueError('No input file or database specified for ' 'starting compounds') print("%s compounds loaded" % len(compound_smiles)) return compound_smiles
def test_file_to_dict_list(): res_7 = OrderedDict([('id', 'cpd01211'), ('abbreviation', 'tcynt'), ('name', 'Thiocyanate'), ('formula', 'CNS'), ('mass', '58'), ('source', 'ModelSEED'), ('structure', 'InChI=1S/CHNS/c2-1-3/h3H'), ('charge', '-1'), ('is_core', '1'), ('is_obsolete', '0'), ('linked_compound', 'null'), ('is_cofactor', '0'), ('deltag', '22.2'), ('deltagerr', '5.68687'), ('pka', '3:0.5'), ('pkb', ''), ('abstract_compound', 'null'), ('comprised_of', 'null'), ('aliases', 'null')]) for file in [ 'test_compounds.tsv', 'test_compounds.csv', 'test_compounds.json' ]: res = utils.file_to_dict_list(data_dir + file) assert len(res) == 15 assert res[7] == res_7 print(file)
def test_file_to_dict_list(): """ GIVEN compound files of different types (.tsv, .csv, and .json) WHEN the file contents are converted to a list of compound dicts THEN check that the list of compound dicts is produced as expected """ res_7 = OrderedDict( [ ("id", "cpd01211"), ("abbreviation", "tcynt"), ("name", "Thiocyanate"), ("formula", "CNS"), ("mass", "58"), ("source", "ModelSEED"), ("structure", "InChI=1S/CHNS/c2-1-3/h3H"), ("charge", "-1"), ("is_core", "1"), ("is_obsolete", "0"), ("linked_compound", "null"), ("is_cofactor", "0"), ("deltag", "22.2"), ("deltagerr", "5.68687"), ("pka", "3:0.5"), ("pkb", ""), ("abstract_compound", "null"), ("comprised_of", "null"), ("aliases", "null"), ] ) filenames = ["test_compounds.tsv", "test_compounds.csv", "test_compounds.json"] for file in filenames: res = utils.file_to_dict_list(DATA_DIR / file) assert len(res) == 15 assert res[7] == res_7
neutralise=options.bnice, image_dir=options.image_dir, database=options.database) # Create a directory for image output file if it doesn't already exist if options.image_dir and not os.path.exists(options.image_dir): os.mkdir(options.image_dir) # If starting compound specified as SMILES string, then add it if options.smiles: pk._add_compound("Start", options.smiles, type='Starting Compound') else: pk.load_compound_set(compound_file=options.compound_file) # Generate reaction network pk.transform_all(max_generations=options.generations, num_workers=options.max_workers) if options.pruning_whitelist: mols = [ pk._mol_from_dict(line) for line in utils.file_to_dict_list(options.pruning_whitelist) ] pk.prune_network([utils.compound_hash(x) for x in mols if x]) # Save to database (e.g. Mongo) if present, otherwise create output file if options.database: print("Saving results to %s" % options.database) pk.save_to_MINE(options.database) else: pk.assign_ids() pk.write_compound_output_file(options.output_dir + '/compounds.tsv') pk.write_reaction_output_file(options.output_dir + '/reactions.tsv') print("Execution took %s seconds." % (time.time() - t1))