def load_from_file(self, file_path=gc.reactionhistorian_data, testing=False): ''' Load the data for the pricer from a locally stored file instead of from the online database. ''' if testing: self.occurrences = defaultdict( lambda: [0, []], { 'CCO>>CCBr': [2, ['rxn1', 'rxn2']], 'CCCCC>>CCC=CC': [1, ['rxn3']], }) self.occurrences_flat = defaultdict( lambda: [0, []], { 'CCO>>CCBr': [2, ['rxn1', 'rxn2']], 'CCCCC>>CCC=CC': [1, ['rxn3']], }) return if os.path.isfile(file_path): with open(file_path, 'rb') as file: self.occurrences = defaultdict(lambda: [0, []], pickle.load(file)) self.occurrences_flat = defaultdict(lambda: [0, []], pickle.load(file)) else: self.load_databases() self.load() self.dump_to_file()
def load_from_file(self, file_path=gc.reactionhistorian_data, testing=False): """Loads the data for the pricer from a locally stored file. Args: file_path (str, optional): Path to the input file. (default: {gc.reactionhistorian_data}) testing (bool, optional): Whether to only run a test. (default: {False}) """ if testing: self.occurrences = defaultdict( lambda: [0, []], { 'CCO>>CCBr': [2, ['rxn1', 'rxn2']], 'CCCCC>>CCC=CC': [1, ['rxn3']], }) self.occurrences_flat = defaultdict( lambda: [0, []], { 'CCO>>CCBr': [2, ['rxn1', 'rxn2']], 'CCCCC>>CCC=CC': [1, ['rxn3']], }) return if os.path.isfile(file_path): with open(file_path, 'rb') as file: self.occurrences = defaultdict(lambda: [0, []], pickle.load(file)) self.occurrences_flat = defaultdict(lambda: [0, []], pickle.load(file)) else: self.load_databases() self.load() self.dump_to_file()
def get_data(max_N_c=None, shuffle=False): '''Creates a dictionary defining data generators for training and validation given pickled data/label files max_N_c and shuffle only refers to training data''' with open(DATA_FPATH, 'rb') as fid: legend_data = pickle.load(fid) with open(LABELS_FPATH, 'rb') as fid: legend_labels = pickle.load(fid) N_samples = legend_data['N_examples'] N_train = int(N_samples * split_ratio[0]) N_val = int(N_samples * split_ratio[1]) N_test = N_samples - N_train - N_val print('Total number of samples: {}'.format(N_samples)) print('Training on {}% - {}'.format(split_ratio[0] * 100, N_train)) print('Validating on {}% - {}'.format(split_ratio[1] * 100, N_val)) print('Testing on {}% - {}'.format( (1 - split_ratio[1] - split_ratio[0]) * 100, N_test)) return { 'N_samples': N_samples, 'N_train': N_train, # 'train_generator': data_generator(0, N_train, batch_size, max_N_c=max_N_c, shuffle=shuffle), 'train_label_generator': label_generator(0, N_train, batch_size), 'train_nb_samples': N_train, # 'val_generator': data_generator(N_train, N_train + N_val, batch_size), 'val_label_generator': label_generator(N_train, N_train + N_val, batch_size), 'val_nb_samples': N_val, # 'test_generator': data_generator(N_train + N_val, N_samples, batch_size), 'test_label_generator': label_generator(N_train + N_val, N_samples, batch_size), 'test_nb_samples': N_test, # # 'batch_size': batch_size, }
def load_model(self, FP_len=1024, model_tag='1024bool'): self.FP_len = FP_len if model_tag != '1024bool' and model_tag != '1024uint8' and model_tag != '2048bool': MyLogger.print_and_log( 'Non-existent SCScore model requested: {}. Using "1024bool" model'.format(model_tag), scscore_prioritizer_loc, level=2) model_tag = '1024bool' filename = 'trained_model_path_'+model_tag with open(gc.SCScore_Prioritiaztion[filename], 'rb') as fid: self.vars = pickle.load(fid) if gc.DEBUG: MyLogger.print_and_log('Loaded synthetic complexity score prioritization model from {}'.format( gc.SCScore_Prioritiaztion[filename]), scscore_prioritizer_loc) if 'uint8' in gc.SCScore_Prioritiaztion[filename]: def mol_to_fp(mol): if mol is None: return np.array((self.FP_len,), dtype=np.uint8) fp = AllChem.GetMorganFingerprint( mol, self.FP_rad, useChirality=True) # uitnsparsevect fp_folded = np.zeros((self.FP_len,), dtype=np.uint8) for k, v in fp.GetNonzeroElements().items(): fp_folded[k % self.FP_len] += v return np.array(fp_folded) else: def mol_to_fp(mol): if mol is None: return np.zeros((self.FP_len,), dtype=np.float32) return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len, useChirality=True), dtype=np.bool) self.mol_to_fp = mol_to_fp self.pricer = Pricer() self.pricer.load() self._restored = True self._loaded = True
def load_model(): with open(gc.Relevance_Prioritization['trained_model_path_{}'.format(self.retro)], 'rb') as fid: self.vars = pickle.load(fid) if gc.DEBUG: MyLogger.print_and_log('Loaded relevance based template prioritization model from {}'.format( gc.Relevance_Prioritization['trained_model_path_{}'.format(self.retro)]), relevance_template_prioritizer_loc) return self
def load_model(depth=5, hidden_size=300, output_size=gc.Relevance_Prioritization['output_size']): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) self.input_mol = tf.placeholder(tf.float32, [self.batch_size, self.FP_len]) self.mol_hiddens = tf.nn.relu(linearND(self.input_mol, hidden_size, scope="encoder0", reuse=tf.AUTO_REUSE)) for d in range(1, depth): self.mol_hiddens = tf.nn.relu(linearND(self.mol_hiddens, hidden_size, scope="encoder%i"%d, reuse=tf.AUTO_REUSE)) self.score = linearND(self.mol_hiddens, output_size, scope="output", reuse=tf.AUTO_REUSE) _, self.topk = tf.nn.top_k(self.score, k=self.NK) tf.global_variables_initializer().run(session=self.session) from functools import reduce size_func = lambda v: reduce(lambda x, y: x*y, v.get_shape().as_list()) n = sum(size_func(v) for v in tf.trainable_variables()) print(("Model size: %dK" % (n/1000,))) self.coord = tf.train.Coordinator() with open(gc.Relevance_Prioritization['trained_model_path_{}'.format(self.retro)], 'rb') as fid: variables = pickle.load(fid) for i, v in enumerate(tf.trainable_variables()): assign_op = tf.assign(v, variables[i]) self.session.run(assign_op) del assign_op print('Loaded tf model from numpy arrays')
def load_from_file(self, file_path=gc.historian_data, refs=False, compressed=False): """Loads the data for the pricer from a locally stored file. Args: file_path (str, optional): Path to the input file. (default: {gc.historian_data}) refs (bool, optional): Whether to include the references or just the counts. (default: {False}) compressed (bool, optional): Whether the data is compressed. (default: {False}) Raises: ValueError: If file does not exist. """ MyLogger.print_and_log('Loading chemhistorian from file...', historian_loc) if not refs: file_path += '_no_refs' if compressed: file_path += '_compressed' if os.path.isfile(file_path): with open(file_path, 'rb') as file: self.occurrences = pickle.load(file) self._loaded = True if compressed: self._compressed = True else: raise ValueError('File does not exist!')
def load(self): ''' Try to load the data for the pricer from a mongo database. If server cannot be found, load from locally stored file instead. ''' from makeit.utilities.io.files import get_pricer_path file_path = get_pricer_path( gc.CHEMICALS['database'], gc.CHEMICALS['collection'], gc.BUYABLES['database'], gc.BUYABLES['collection'], ) self.load_databases() if not self.BUYABLE_DB and os.path.isfile(file_path): with open(file_path, 'rb') as file: self.prices = defaultdict(float, pickle.load(file)) self.prices_flat = defaultdict(float, pickle.load(file)) self.prices_by_xrn = defaultdict(float, pickle.load(file)) MyLogger.print_and_log('Loaded prices from flat file', pricer_loc)
def load(self): ''' Load the data for the pricer from a locally stored file instead of from the online database. ''' from makeit.utilities.io.files import get_pricer_path file_path = get_pricer_path( gc.CHEMICALS['database'], gc.CHEMICALS['collection'], gc.BUYABLES['database'], gc.BUYABLES['collection'], ) if os.path.isfile(file_path): with open(file_path, 'rb') as file: self.prices = defaultdict(float, pickle.load(file)) self.prices_flat = defaultdict(float, pickle.load(file)) else: self.load_databases() self.load_from_database() self.dump_to_file(file_path)
def label_generator(start_at, end_at, batch_size): '''This function generates labels to match the data generated by data_generator''' filePos_start_at = -1 # Keep returning forever and ever with open(LABELS_FPATH, 'rb') as fid: while True: # Is this the first iteration? if filePos_start_at == -1: # Remember where data starts legend_labels = pickle.load(fid) # first doc is legend CANDIDATE_SMILES = legend_labels['candidate_smiles'] CANDIDATE_EDITS = legend_labels['candidate_edits_compact'] REACTION_TRUE = legend_labels['reaction_true'] RXDID = legend_labels['rxdid'] for i in range(start_at): pickle.load(fid) # throw away first ___ entries filePos_start_at = fid.tell() else: fid.seek(filePos_start_at) for startIndex in range(start_at, end_at, batch_size): endIndex = min(startIndex + batch_size, end_at) docs = [pickle.load(fid) for j in range(startIndex, endIndex)] yield { 'candidate_smiles': [doc[CANDIDATE_SMILES] for doc in docs], 'candidate_edits': [doc[CANDIDATE_EDITS] for doc in docs], 'reaction_true': [doc[REACTION_TRUE] for doc in docs], 'rxdid': [doc[RXDID] for doc in docs] } filePos_start_at = -1
def load_from_file(self, file_path=gc.historian_data, refs=False, compressed=False): ''' Load the data for the pricer from a locally stored file instead of from the online database. ''' MyLogger.print_and_log('Loading chemhistorian from file...', historian_loc) if not refs: file_path += '_no_refs' if compressed: file_path += '_compressed' if os.path.isfile(file_path): with open(file_path, 'rb') as file: self.occurrences = pickle.load(file) self._loaded = True if compressed: self._compressed = True else: raise ValueError('File does not exist!')
# chemhistorian.load_from_file() chemhistorian = None from makeit.prioritization.precursors.scscore import SCScorePrecursorPrioritizer scscorer = SCScorePrecursorPrioritizer() scscorer.load_model(model_tag='1024bool') print('Loaded SCScorer on website') print(scscorer.get_score_from_smiles('CCCC', noprice=True)) # Solvent choices - the save file is created by the template-based forward predictor solvent_choices = [] from makeit.utilities.io.files import get_abraham_solvents_path file_path = get_abraham_solvents_path() if os.path.isfile(file_path): with open(file_path, 'rb') as fid: solvent_name_to_smiles = pickle.load(fid) solvent_choices = [{ 'smiles': v, 'name': k } for (k, v) in solvent_name_to_smiles.items()] else: db_client = MongoClient(gc.MONGO['path'], gc.MONGO['id'], connect=gc.MONGO['connect']) db = db_client[gc.SOLVENTS['database']] SOLVENT_DB = db[gc.SOLVENTS['collection']] for doc in SOLVENT_DB.find({'_id': {'$ne': 'default'}}): solvent_choices.append({ 'smiles': doc['smiles'], 'name': doc['name'], })
def load(self, folder="", worker_no = 0): '''Load a neural network scoring model''' if worker_no==0: MyLogger.print_and_log('Starting to load scorer...', template_nn_scorer_loc) # First load neural network if not folder: MyLogger.print_and_log( 'Cannot load neural network without the directory in which the parameters are saved. Exiting...', template_nn_scorer_loc, level=3) # Get model args ARGS_FPATH = os.path.join(folder, 'args.json') with open(ARGS_FPATH, 'r') as fid: args = json.load(fid) N_h2 = int(args['Nh2']) N_h1 = int(args['Nh1']) N_h3 = int(args['Nh3']) N_hf = int(args['Nhf']) l2v = float(args['l2']) lr = float(args['lr']) context_weight = float(args['context_weight']) enhancement_weight = float(args['enhancement_weight']) optimizer = args['optimizer'] inner_act = args['inner_act'] TARGET_YIELD = False self.model = build(F_atom=self.F_atom, F_bond=self.F_bond, N_h1=N_h1, N_h2=N_h2, N_h3=N_h3, N_hf=N_hf, l2v=l2v, inner_act=inner_act, context_weight=context_weight, enhancement_weight=enhancement_weight, TARGET_YIELD=TARGET_YIELD, absolute_score=True) WEIGHTS_FPATH = os.path.join(folder, 'weights.h5') self.model.load_weights(WEIGHTS_FPATH, by_name=True) # Now load solvent information # Try to load from file first from makeit.utilities.io.files import get_abraham_solvents_path file_path = get_abraham_solvents_path() if os.path.isfile(file_path): with open(file_path, 'rb') as fid: self.solvent_name_to_smiles = pickle.load(fid) self.solvent_smiles_to_params = pickle.load(fid) else: db_client = MongoClient(gc.MONGO['path'], gc.MONGO[ 'id'], connect=gc.MONGO['connect']) db = db_client[gc.SOLVENTS['database']] SOLVENT_DB = db[gc.SOLVENTS['collection']] for doc in SOLVENT_DB.find(): try: if doc['_id'] == 'default': self.solvent_name_to_smiles['default'] = doc['_id'] else: self.solvent_name_to_smiles[doc['name']] = doc['_id'] self.solvent_smiles_to_params[doc['_id']] = doc except KeyError: MyLogger.print_and_log('Solvent doc {} missing a name'.format( doc), template_nn_scorer_loc, level=1) with open(file_path, 'wb') as fid: pickle.dump(self.solvent_name_to_smiles, fid) pickle.dump(self.solvent_smiles_to_params, fid) if worker_no == 0: MyLogger.print_and_log('Scorer has been loaded.', template_nn_scorer_loc)
def load_from_file(self, retro, file_path, chiral=False, rxns=True, refs=False, efgs=False, rxn_ex=False): """Read the template database from a previously saved file. Args: retro (bool): Whether in the retrosynthetic direction. file_path (str): Pickle file to read dumped templates from. chiral (bool, optional): Whether to handle chirality properly (only for retro for now). (default: {False}) rxns (bool, optional): Whether to actually load the reaction objects (or just the info). (default: {True}) refs (bool, optional): Whether to include references. (default: {False}) efgs (bool, optional): Whether to include efg information. (default: {False}) rxn_ex (bool, optional): Whether to include reaction examples. (default: {False}) """ MyLogger.print_and_log('Loading templates from {}'.format(file_path), transformer_loc) if os.path.isfile(file_path): with open(file_path, 'rb') as file: if retro and chiral and rxns: # cannot pickle rdchiralReactions, so need to reload from SMARTS pickle_templates = pickle.load(file) self.templates = [] for template in pickle_templates: try: template['rxn'] = rdchiralReaction( str('(' + template['reaction_smarts'].replace( '>>', ')>>(') + ')')) except Exception as e: template['rxn'] = None self.templates.append(template) else: self.templates = pickle.load(file) else: MyLogger.print_and_log("No file to read data from.", transformer_loc, level=1) raise IOError('File not found to load template_transformer from!') # Clear out unnecessary info if not refs: [ self.templates[i].pop('references', None) for i in range(len(self.templates)) ] elif 'references' not in self.templates[0]: raise IOError( 'Save file does not contain references (which were requested!)' ) if not efgs: [ self.templates[i].pop('efgs', None) for i in range(len(self.templates)) ] elif 'efgs' not in self.templates[0]: raise IOError( 'Save file does not contain efg info (which was requested!)') if not rxn_ex: [ self.templates[i].pop('rxn_example', None) for i in range(len(self.templates)) ] elif 'rxn_example' not in self.templates[0]: raise IOError( 'Save file does not contain a reaction example (which was requested!)' ) self.num_templates = len(self.templates) MyLogger.print_and_log( 'Loaded templates. Using {} templates'.format(self.num_templates), transformer_loc)
def data_generator(start_at, end_at, batch_size, max_N_c=None, shuffle=False): '''This function generates batches of data from the pickle file since all the data can't fit in memory. The starting and ending indices are specified explicitly so the same function can be used for validation data as well Input tensors are generated on-the-fly so there is less I/O max_N_c is the maximum number of candidates to consider. This should ONLY be used for training, not for validation or testing.''' def bond_string_to_tuple(string): split = string.split('-') return (split[0], split[1], float(split[2])) fileInfo = [() for j in range(start_at, end_at, batch_size) ] # (filePos, startIndex, endIndex) batchDims = [() for j in range(start_at, end_at, batch_size) ] # dimensions of each batch batchNums = np.array([ i for (i, j) in enumerate(range(start_at, end_at, batch_size)) ]) # list to shuffle later # Keep returning forever and ever with open(DATA_FPATH, 'rb') as fid: # Do a first pass through the data legend_data = pickle.load(fid) # first doc is legend # Pre-load indeces CANDIDATE_EDITS_COMPACT = legend_data['candidate_edits_compact'] ATOM_DESC_DICT = legend_data['atom_desc_dict'] T = legend_data['T'] SOLVENT = legend_data['solvent'] REAGENT = legend_data['reagent'] YIELD = legend_data['yield'] REACTION_TRUE_ONEHOT = legend_data['reaction_true_onehot'] for i in range(start_at): pickle.load(fid) # throw away first ___ entries for k, startIndex in enumerate(range(start_at, end_at, batch_size)): endIndex = min(startIndex + batch_size, end_at) # Remember this starting position fileInfo[k] = (fid.tell(), startIndex, endIndex) N = endIndex - startIndex # number of samples this batch # print('Serving up examples {} through {}'.format(startIndex, endIndex)) docs = [pickle.load(fid) for j in range(startIndex, endIndex)] # FNeed to figure out size of padded batch N_c = max([len(doc[REACTION_TRUE_ONEHOT]) for doc in docs]) if type(max_N_c) != type(None): # allow truncation during training N_c = min(N_c, max_N_c) N_e1 = 1 N_e2 = 1 N_e3 = 1 N_e4 = 1 for i, doc in enumerate(docs): for (c, edit_string) in enumerate(doc[CANDIDATE_EDITS_COMPACT]): if c >= N_c: break edit_string_split = edit_string.split(';') N_e1 = max(N_e1, edit_string_split[0].count(',') + 1) N_e2 = max(N_e2, edit_string_split[1].count(',') + 1) N_e3 = max(N_e3, edit_string_split[2].count(',') + 1) N_e4 = max(N_e4, edit_string_split[3].count(',') + 1) # Remember sizes of x_h_lost, x_h_gain, x_bond_lost, x_bond_gain, reaction_true_onehot batchDim = (N, N_c, N_e1, N_e2, N_e3, N_e4) # print('The padded sizes of this batch will be: N, N_c, N_e1, N_e2, N_e3, N_e4') # print(batchDim) batchDims[k] = batchDim while True: if shuffle: np.random.shuffle(batchNums) for batchNum in batchNums: (filePos, startIndex, endIndex) = fileInfo[batchNum] (N, N_c, N_e1, N_e2, N_e3, N_e4) = batchDims[batchNum] fid.seek(filePos) N = endIndex - startIndex # number of samples this batch # print('Serving up examples {} through {}'.format(startIndex, endIndex)) docs = [pickle.load(fid) for j in range(startIndex, endIndex)] # Initialize numpy arrays for x_h_lost, etc. x_h_lost = np.zeros((N, N_c, N_e1, F_atom), dtype=np.float32) x_h_gain = np.zeros((N, N_c, N_e2, F_atom), dtype=np.float32) x_bond_lost = np.zeros((N, N_c, N_e3, F_bond), dtype=np.float32) x_bond_gain = np.zeros((N, N_c, N_e4, F_bond), dtype=np.float32) reaction_true_onehot = np.zeros((N, N_c), dtype=np.float32) yields = np.zeros((N, 1), dtype=np.float32) for i, doc in enumerate(docs): for (c, edit_string) in enumerate( doc[CANDIDATE_EDITS_COMPACT]): if c >= N_c: break edit_string_split = edit_string.split(';') edits = [ [ atom_string for atom_string in edit_string_split[0].split(',') if atom_string ], [ atom_string for atom_string in edit_string_split[1].split(',') if atom_string ], [ bond_string_to_tuple(bond_string) for bond_string in edit_string_split[2].split(',') if bond_string ], [ bond_string_to_tuple(bond_string) for bond_string in edit_string_split[3].split(',') if bond_string ], ] try: edit_h_lost_vec, edit_h_gain_vec, \ edit_bond_lost_vec, edit_bond_gain_vec = edits_to_vectors(edits, None, atom_desc_dict = doc[ATOM_DESC_DICT]) except KeyError as e: # sometimes molAtomMapNumber not found if hydrogens were explicit continue for (e, edit_h_lost) in enumerate(edit_h_lost_vec): if e >= N_e1: raise ValueError('N_e1 not large enough!') x_h_lost[i, c, e, :] = edit_h_lost for (e, edit_h_gain) in enumerate(edit_h_gain_vec): if e >= N_e2: raise ValueError('N_e2 not large enough!') x_h_gain[i, c, e, :] = edit_h_gain for (e, edit_bond_lost) in enumerate(edit_bond_lost_vec): if e >= N_e3: raise ValueError('N_e3 not large enough!') x_bond_lost[i, c, e, :] = edit_bond_lost for (e, edit_bond_gain) in enumerate(edit_bond_gain_vec): if e >= N_e4: raise ValueRrror('N_e4 not large enough!') x_bond_gain[i, c, e, :] = edit_bond_gain # Add truncated reaction true (eventually will not truncate) if type(max_N_c) == type(None): reaction_true_onehot[ i, :len(doc[REACTION_TRUE_ONEHOT] )] = doc[REACTION_TRUE_ONEHOT] else: reaction_true_onehot[ i, :min(len(doc[REACTION_TRUE_ONEHOT]), max_N_c )] = doc[REACTION_TRUE_ONEHOT][:max_N_c] yields[i, 0] = doc[YIELD] / 100.0 # Get rid of NaNs x_h_lost[np.isnan(x_h_lost)] = 0.0 x_h_gain[np.isnan(x_h_gain)] = 0.0 x_bond_lost[np.isnan(x_bond_lost)] = 0.0 x_bond_gain[np.isnan(x_bond_gain)] = 0.0 x_h_lost[np.isinf(x_h_lost)] = 0.0 x_h_gain[np.isinf(x_h_gain)] = 0.0 x_bond_lost[np.isinf(x_bond_lost)] = 0.0 x_bond_gain[np.isinf(x_bond_gain)] = 0.0 # print('Batch {} to {}'.format(startIndex, endIndex)) # yield (x, y) as tuple, but each one is a list if TARGET_YIELD: y = yields else: y = reaction_true_onehot yield ( [ x_h_lost, x_h_gain, x_bond_lost, x_bond_gain, np.array([doc[REAGENT] for doc in docs], dtype=np.float32), # reagent np.array([doc[SOLVENT] for doc in docs], dtype=np.float32), # solvent np.array([doc[T] for doc in docs], dtype=np.float32), # temperature ], [ y, ], )
def load_model(self, FP_len=1024, model_tag='1024bool'): """Loads model from given tag. Args: FP_len (int, optional): Fingerprint length. (default: {1024}) model_tag (str, optional): Tag of model to load. (default: {'1024bool'}) """ self.FP_len = FP_len if model_tag != '1024bool' and model_tag != '1024uint8' and model_tag != '2048bool': MyLogger.print_and_log( 'Non-existent SCScore model requested: {}. Using "1024bool" model' .format(model_tag), scscore_prioritizer_loc, level=2) model_tag = '1024bool' filename = 'trained_model_path_' + model_tag with open(gc.SCScore_Prioritiaztion[filename], 'rb') as fid: self.vars = pickle.load(fid) if gc.DEBUG: MyLogger.print_and_log( 'Loaded synthetic complexity score prioritization model from {}' .format(gc.SCScore_Prioritiaztion[filename]), scscore_prioritizer_loc) if 'uint8' in gc.SCScore_Prioritiaztion[filename]: def mol_to_fp(mol): """Returns fingerprint of molecule for uint8 model. Args: mol (Chem.rdchem.Mol or None): Molecule to get fingerprint of. Returns: np.ndarray of np.uint8: Fingerprint of given molecule. """ if mol is None: return np.array((self.FP_len, ), dtype=np.uint8) fp = AllChem.GetMorganFingerprint( mol, self.FP_rad, useChirality=True) # uitnsparsevect fp_folded = np.zeros((self.FP_len, ), dtype=np.uint8) for k, v in fp.GetNonzeroElements().items(): fp_folded[k % self.FP_len] += v return np.array(fp_folded) else: def mol_to_fp(mol): """Returns fingerprint of molecule for bool model. Args: mol (Chem.rdchem.Mol or None): Molecule to get fingerprint of. Returns: np.ndarray of np.bool or np.float32: Fingerprint of given molecule. """ if mol is None: return np.zeros((self.FP_len, ), dtype=np.float32) return np.array(AllChem.GetMorganFingerprintAsBitVect( mol, self.FP_rad, nBits=self.FP_len, useChirality=True), dtype=np.bool) self.mol_to_fp = mol_to_fp self.pricer = Pricer() self.pricer.load() self._restored = True self._loaded = True