Ejemplo n.º 1
0
    def _get_generations(self, num_generations, map_):
        def get_mutation_record(batch):
            return self._mutator.mutate(batch[0])

        def get_key(record):
            return self._key_maker.get_key(record.get_molecule())

        population = self._initial_population

        self._logger.info('Calculating fitness values of initial population.')
        population = tuple(self._with_fitness_values(map_, population))
        population = tuple(
            self._fitness_normalizer.normalize(population=population, ))
        yield Generation(
            molecule_records=population,
            mutation_records=(),
            crossover_records=(),
        )

        for generation in range(1, num_generations):
            self._logger.info(f'Starting generation {generation}.')
            self._logger.info(f'Population size is {len(population)}.')

            self._logger.info('Doing crossovers.')
            crossover_records = tuple(self._get_crossover_records(population))

            self._logger.info('Doing mutations.')
            mutation_records = tuple(record for record in map(
                get_mutation_record,
                self._mutation_selector.select(population),
            ) if record is not None)

            self._logger.info('Calculating fitness values.')

            offspring = (record.get_molecule_record()
                         for record in crossover_records)
            mutants = (record.get_molecule_record()
                       for record in mutation_records)

            population = tuple(
                self._with_fitness_values(
                    map_=map_,
                    population=tuple(
                        dedupe(
                            iterable=it.chain(population, offspring, mutants),
                            key=get_key,
                        )),
                ))
            population = tuple(self._fitness_normalizer.normalize(population))

            population = tuple(molecule_record for molecule_record, in
                               self._generation_selector.select(population))

            yield Generation(
                molecule_records=population,
                mutation_records=mutation_records,
                crossover_records=crossover_records,
            )
Ejemplo n.º 2
0
    def get_all(self):
        # Get all potential indices.
        indices = itertools.chain(
            self._position_matrices.index_information().values(),
            self._molecules.index_information().values(),
        )
        keys = tuple(
            dedupe(
                index['key'][0][0] for index in indices
                # Ignore "_id" index which is unique in a collection and
                # cannot be used to match molecular data split across
                # collections.
                if index['key'][0][0] != '_id'))

        query = [
            {
                '$match': {
                    '$or': [{
                        key: {
                            '$exists': True
                        }
                    } for key in keys],
                },
            },
        ]
        query.extend({
            '$lookup': {
                'from':
                self._position_matrices.name,
                'let': {
                    'molecule_key': f'${key}',
                },
                'as':
                f'posmat_{key}',
                'pipeline': [
                    {
                        '$match': {
                            key: {
                                '$ne': None
                            },
                        },
                    },
                    {
                        '$match': {
                            '$expr': {
                                '$eq': [
                                    f'${key}',
                                    '$$molecule_key',
                                ],
                            },
                        },
                    },
                ],
            },
        } for key in keys)
        query.append(
            {
                '$match': {
                    '$expr': {
                        '$or': [{
                            '$gt': [{
                                '$size': f'$posmat_{key}'
                            }, 0],
                        } for key in keys],
                    },
                },
            }, )

        cursor = self._molecules.aggregate(query)
        for entry in cursor:
            position_matrix_document = get_any_value(
                mapping=entry,
                keys=(f'posmat_{key}' for key in keys),
            )
            if position_matrix_document is not None:
                yield self._dejsonizer.from_json({
                    'molecule':
                    entry,
                    'matrix':
                    position_matrix_document,
                })