def canonical_cycle_decomp(self, cycle_decomp): ''' This is the same as finding every single connected component of the graph ''' if not hasattr(self, '_map'): raise RuntimeError('Need to create the permutation map before'\ 'computing the cycle decomposition!') visited = set() to_visit = set(range(1, self._max + 1)) cycle = [] while to_visit: # dfs on min element of elements curr = fst = min(to_visit) current_cycle = [] while to_visit: current_cycle.append(curr) to_visit.remove(curr) #if curr in to_visit: # to_visit.remove(curr) nxt = self.__getitem__(curr) if nxt == fst: break curr = nxt cycle.append(current_cycle) return canonicalize(cycle)
def _verifyEq(v, exp): v = canonicalize(v) exp = canonicalize(exp) if v != exp: return "Found `%s`, expected `%s`" % (v, exp) return None
def inverse(self): # should this return another object? # the list of reversed perms? inv = map(lambda x: x[::-1], self.decomp) inv = canonicalize(inv) return Perm(inv)
def optimise(self, objective, start_population, keep_top, n_epochs, mols_to_sample, optimize_n_epochs, optimize_batch_size, pretrain_n_epochs) -> List[OptResult]: """ Takes an objective and tries to optimise it :param objective: MPO :param start_population: Initial compounds (list of smiles) or request new (random?) population :param kwargs need to contain: keep_top: number of molecules to keep at each iterative finetune step mols_to_sample: number of molecules to sample at each iterative finetune step optimize_n_epochs: number of episodes to finetune optimize_batch_size: batch size for fine-tuning pretrain_n_epochs: number of epochs to pretrain on start population :return: Candidate molecules """ int_results = self.pretrain_on_initial_population(objective, start_population, pretrain_epochs=pretrain_n_epochs) results: List[OptResult] = [] seen: Set[str] = set() for k in int_results: if k.smiles not in seen: results.append(k) seen.add(k.smiles) for epoch in range(1, 1 + n_epochs): t0 = time.time() samples = self.sampler.sample(self.model, mols_to_sample, max_seq_len=self.max_len) t1 = time.time() #### Modification # Score all samples scores = objective(samples, flt=True) int_results = [OptResult(smiles=canonicalize(smiles), score=score) for smiles, score in zip(samples, scores)] # Keep 'payload' i.e. unseen samples int_results = [OR for OR in int_results if (OR.smiles not in seen) and (OR.smiles is not None)] # Sort for reproducibility between different runs int_results = sorted(int_results, key=lambda x: x.smiles) # Update seen seen.update(set(canonicalize_list(samples, include_stereocenters=True))) #### #### Original # This removes duplicates and invalid smiles which I want to capture in MolScore #canonicalized_samples = set(canonicalize_list(samples, include_stereocenters=True)) #payload = list(canonicalized_samples.difference(seen)) #payload.sort() # necessary for reproducibility between different runs #seen.update(canonicalized_samples) #scores = objective(payload, flt=True) #int_results = [OptResult(smiles=smiles, score=score) for smiles, score in zip(payload, scores)] #### t2 = time.time() results.extend(sorted(int_results, reverse=True)[0:keep_top]) results.sort(reverse=True) subset = [i.smiles for i in results][0:keep_top] np.random.shuffle(subset) sub_train = subset[0:int(3 * len(subset) / 4)] sub_test = subset[int(3 * len(subset) / 4):] train_seqs, _ = load_smiles_from_list(sub_train, max_len=self.max_len) valid_seqs, _ = load_smiles_from_list(sub_test, max_len=self.max_len) train_set = get_tensor_dataset(train_seqs) valid_set = get_tensor_dataset(valid_seqs) opt_batch_size = min(len(sub_train), optimize_batch_size) print_every = int(len(sub_train) / opt_batch_size) if optimize_n_epochs > 0: self.trainer.fit(train_set, valid_set, n_epochs=optimize_n_epochs, batch_size=opt_batch_size, print_every=print_every, valid_every=print_every) t3 = time.time() logger.info(f'Generation {epoch} --- timings: ' f'sample: {(t1 - t0):.3f} s, ' f'score: {(t2 - t1):.3f} s, ' f'finetune: {(t3 - t2):.3f} s') top4 = '\n'.join(f'\t{result.score:.3f}: {result.smiles}' for result in results[:4]) logger.info(f'Top 4:\n{top4}') return sorted(results, reverse=True)
print("\nInitial feasible basic solution: ") print_vars(vars) s = np.argmin(c) # idx of min cost coefficient r = print_table(a, b, c, s, basis, vars) # the idx min b/a value i = 1 while True: input() print("\nIteration number : %d" % i) # we have pivot element at coordinates = [r, s], we need to modify `basis` # a basis variable will be converted into non-basis variable. basis = update_basis(a, basis, r, s) # get the table in canonical form a, b, c = canonicalize(a, b, c, r, s) # evaluate variables vars = update_vars(a, b, vars, basis) s = np.argmin(c) # idx of min cost coefficient r = print_table(a, b, c, s, basis, vars) # the idx min b/a value # check if any non basic variable's cost coeff has become zero # if yes, then infinite solutions are possible for var in vars: if var not in basis: # non basic variable if c[int(var[-1]) - 1] == 0: print("Cost coeff corresponding to a non basic variable: " + var + " is zero.") final_print("Infinite Solutions possible") exit() if np.min(c) >= 0: # all cost coefficients are positive