def _get_generations(self, num_generations, map_): def get_mutation_record(batch): return self._mutator.mutate(batch[0]) def get_key(record): return self._key_maker.get_key(record.get_molecule()) population = self._initial_population self._logger.info('Calculating fitness values of initial population.') population = tuple(self._with_fitness_values(map_, population)) population = tuple( self._fitness_normalizer.normalize(population=population, )) yield Generation( molecule_records=population, mutation_records=(), crossover_records=(), ) for generation in range(1, num_generations): self._logger.info(f'Starting generation {generation}.') self._logger.info(f'Population size is {len(population)}.') self._logger.info('Doing crossovers.') crossover_records = tuple(self._get_crossover_records(population)) self._logger.info('Doing mutations.') mutation_records = tuple(record for record in map( get_mutation_record, self._mutation_selector.select(population), ) if record is not None) self._logger.info('Calculating fitness values.') offspring = (record.get_molecule_record() for record in crossover_records) mutants = (record.get_molecule_record() for record in mutation_records) population = tuple( self._with_fitness_values( map_=map_, population=tuple( dedupe( iterable=it.chain(population, offspring, mutants), key=get_key, )), )) population = tuple(self._fitness_normalizer.normalize(population)) population = tuple(molecule_record for molecule_record, in self._generation_selector.select(population)) yield Generation( molecule_records=population, mutation_records=mutation_records, crossover_records=crossover_records, )
def get_all(self): # Get all potential indices. indices = itertools.chain( self._position_matrices.index_information().values(), self._molecules.index_information().values(), ) keys = tuple( dedupe( index['key'][0][0] for index in indices # Ignore "_id" index which is unique in a collection and # cannot be used to match molecular data split across # collections. if index['key'][0][0] != '_id')) query = [ { '$match': { '$or': [{ key: { '$exists': True } } for key in keys], }, }, ] query.extend({ '$lookup': { 'from': self._position_matrices.name, 'let': { 'molecule_key': f'${key}', }, 'as': f'posmat_{key}', 'pipeline': [ { '$match': { key: { '$ne': None }, }, }, { '$match': { '$expr': { '$eq': [ f'${key}', '$$molecule_key', ], }, }, }, ], }, } for key in keys) query.append( { '$match': { '$expr': { '$or': [{ '$gt': [{ '$size': f'$posmat_{key}' }, 0], } for key in keys], }, }, }, ) cursor = self._molecules.aggregate(query) for entry in cursor: position_matrix_document = get_any_value( mapping=entry, keys=(f'posmat_{key}' for key in keys), ) if position_matrix_document is not None: yield self._dejsonizer.from_json({ 'molecule': entry, 'matrix': position_matrix_document, })