Beispiel #1
0
    def canonical_cycle_decomp(self, cycle_decomp):
        '''
        This is the same as finding every single connected component of the graph
        '''
        if not hasattr(self, '_map'):
            raise RuntimeError('Need to create the permutation map before'\
                               'computing the cycle decomposition!')

        visited = set()
        to_visit = set(range(1, self._max + 1))
        cycle = []
        while to_visit:
            # dfs on min element of elements
            curr = fst = min(to_visit)
            current_cycle = []

            while to_visit:
                current_cycle.append(curr)
                to_visit.remove(curr)
                #if curr in to_visit:
                #    to_visit.remove(curr)
                nxt = self.__getitem__(curr)
                if nxt == fst:
                    break
                curr = nxt

            cycle.append(current_cycle)

        return canonicalize(cycle)
Beispiel #2
0
 def _verifyEq(v, exp):
     v = canonicalize(v)
     exp = canonicalize(exp)
     if v != exp:
         return "Found `%s`, expected `%s`" % (v, exp)
     return None
Beispiel #3
0
 def inverse(self):
     # should this return another object?
     # the list of reversed perms?
     inv = map(lambda x: x[::-1], self.decomp)
     inv = canonicalize(inv)
     return Perm(inv)
Beispiel #4
0
    def optimise(self, objective, start_population, keep_top, n_epochs, mols_to_sample,
                 optimize_n_epochs, optimize_batch_size, pretrain_n_epochs) -> List[OptResult]:
        """
        Takes an objective and tries to optimise it
        :param objective: MPO
        :param start_population: Initial compounds (list of smiles) or request new (random?) population
        :param kwargs need to contain:
                keep_top: number of molecules to keep at each iterative finetune step
                mols_to_sample: number of molecules to sample at each iterative finetune step
                optimize_n_epochs: number of episodes to finetune
                optimize_batch_size: batch size for fine-tuning
                pretrain_n_epochs: number of epochs to pretrain on start population
        :return: Candidate molecules
        """

        int_results = self.pretrain_on_initial_population(objective, start_population,
                                                          pretrain_epochs=pretrain_n_epochs)

        results: List[OptResult] = []
        seen: Set[str] = set()

        for k in int_results:
            if k.smiles not in seen:
                results.append(k)
                seen.add(k.smiles)

        for epoch in range(1, 1 + n_epochs):

            t0 = time.time()
            samples = self.sampler.sample(self.model, mols_to_sample, max_seq_len=self.max_len)
            t1 = time.time()

            #### Modification
            # Score all samples
            scores = objective(samples, flt=True)
            int_results = [OptResult(smiles=canonicalize(smiles), score=score) for
                           smiles, score in zip(samples, scores)]
            # Keep 'payload' i.e. unseen samples
            int_results = [OR for OR in int_results if (OR.smiles not in seen) and (OR.smiles is not None)]
            # Sort for reproducibility between different runs
            int_results = sorted(int_results, key=lambda x: x.smiles)
            # Update seen
            seen.update(set(canonicalize_list(samples, include_stereocenters=True)))
            ####

            #### Original
            # This removes duplicates and invalid smiles which I want to capture in MolScore
            #canonicalized_samples = set(canonicalize_list(samples, include_stereocenters=True))
            #payload = list(canonicalized_samples.difference(seen))
            #payload.sort()  # necessary for reproducibility between different runs


            #seen.update(canonicalized_samples)

            #scores = objective(payload, flt=True)
            #int_results = [OptResult(smiles=smiles, score=score) for smiles, score in zip(payload, scores)]
            ####

            t2 = time.time()

            results.extend(sorted(int_results, reverse=True)[0:keep_top])
            results.sort(reverse=True)
            subset = [i.smiles for i in results][0:keep_top]

            np.random.shuffle(subset)

            sub_train = subset[0:int(3 * len(subset) / 4)]
            sub_test = subset[int(3 * len(subset) / 4):]

            train_seqs, _ = load_smiles_from_list(sub_train, max_len=self.max_len)
            valid_seqs, _ = load_smiles_from_list(sub_test, max_len=self.max_len)

            train_set = get_tensor_dataset(train_seqs)
            valid_set = get_tensor_dataset(valid_seqs)

            opt_batch_size = min(len(sub_train), optimize_batch_size)

            print_every = int(len(sub_train) / opt_batch_size)

            if optimize_n_epochs > 0:
                self.trainer.fit(train_set, valid_set,
                                 n_epochs=optimize_n_epochs,
                                 batch_size=opt_batch_size,
                                 print_every=print_every,
                                 valid_every=print_every)

            t3 = time.time()

            logger.info(f'Generation {epoch} --- timings: '
                        f'sample: {(t1 - t0):.3f} s, '
                        f'score: {(t2 - t1):.3f} s, '
                        f'finetune: {(t3 - t2):.3f} s')

            top4 = '\n'.join(f'\t{result.score:.3f}: {result.smiles}' for result in results[:4])
            logger.info(f'Top 4:\n{top4}')

        return sorted(results, reverse=True)
Beispiel #5
0
print("\nInitial feasible basic solution: ")
print_vars(vars)
s = np.argmin(c)  # idx of min cost coefficient
r = print_table(a, b, c, s, basis, vars)  # the idx min b/a value

i = 1

while True:
    input()
    print("\nIteration number : %d" % i)
    # we have pivot element at coordinates = [r, s], we need to modify `basis`
    # a basis variable will be converted into non-basis variable.
    basis = update_basis(a, basis, r, s)
    # get the table in canonical form
    a, b, c = canonicalize(a, b, c, r, s)
    # evaluate variables
    vars = update_vars(a, b, vars, basis)
    s = np.argmin(c)  # idx of min cost coefficient
    r = print_table(a, b, c, s, basis, vars)  # the idx min b/a value

    # check if any non basic variable's cost coeff has become zero
    # if yes, then infinite solutions are possible
    for var in vars:
        if var not in basis:  # non basic variable
            if c[int(var[-1]) - 1] == 0:
                print("Cost coeff corresponding to a non basic variable: " + var + " is zero.")
                final_print("Infinite Solutions possible")
                exit()

    if np.min(c) >= 0:  # all cost coefficients are positive