Ejemplo n.º 1
0
    def __init__(self, parent, id, title):
        wx.Frame.__init__(self, parent, id, title, size=(800, 400))

        genome = Genome()
        genome.setSequence("")

        featurelist = FeatureList("test")
        feature1 = Feature("protein1", "protein asdgqiuw", 0, 49)
        feature2 = Feature("protein2", "asdgqidu", 270, 300)
        featurelist.addFeature(feature1)
        featurelist.addFeature(feature2)

        gff = FeatureList("gff")
        featuregff1 = Feature("protein1", "protein asdgqiuw", 60, 70)
        featuregff2 = Feature("protein1", "protein asdgqiuw", 78, 80)
        gff.addFeature(featuregff1)
        gff.addFeature(featuregff2)

        flc = FeatureListContainer()
        flc.addFeatureList(featurelist)
        flc.addFeatureList(gff)
        self.model = GenomeModel()
        self.model.setGenome(genome)
        self.model.setFeatureListContainer(flc)
        self.view = GenomeView(self.model, self)
        self.Show()
Ejemplo n.º 2
0
 def initial_generation(self):
     initial_genomes = []
     for i in range(self.population):
         g = Genome()
         g.solution = random_solution(self.genome_length)
         initial_genomes.append(g)
     return Generation(initial_genomes, self.answer)
Ejemplo n.º 3
0
    def from_gen_file(file_name, old=False):
        """
			imports a (unaligned) gen_file and returns a GenomeCompare object
			@params
				file_name: the name of the file containing the genomes
				old / boolean / False
					if this file was created before June 23rd 2015, it is likely to be in the float format.
					use True in that case only.
		"""

        import csv
        genomes = {}
        with open(file_name, 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                if old:
                    genomes[int(row[0])] = Genome.from_mutated_loci(
                        map(float, row[2:]),
                        mutation_rate=int(row[1]),
                        name=int(row[0]))
                else:
                    genomes[int(row[0])] = Genome.from_mutated_loci(
                        map(int, row[2:]),
                        mutation_rate=int(row[1]),
                        name=int(row[0]))

        return GenomeCompare(genomes=genomes)
Ejemplo n.º 4
0
class Player:
    def __init__(self):
        assert INPUTS != 0 and OUTPUTS != 0, "You must call the initialize method before creating players!"
        self.fitness = -1
        self.unadjustedFitness = -1
        self.brain = Genome(INPUTS, OUTPUTS, False)
        self.vision = []
        self.actions = []
        self.lifespan = 0
        self.dead = False
        self.replay = False
        self.gen = 0
        self.name = ""
        self.speciesName = "Not yet defined"

    def update(self):
        # Does something that will eventually end up in death or victory
        # for the organism
        pass

    def look(self):
        # Looks at the input - This is where you should populate your vision
        # array
        pass

    def think(self):
        # Makes actions based off of input
        # Fill in based off of what you want to happen
        pass

    def clone(self):
        out = Player()
        out.replay = False
        out.fitness = self.fitness
        out.gen = self.gen
        out.brain = self.brain.clone()
        return out

    def cloneForReplay(self):
        out = Player()
        out.replay = True
        out.fitness = self.fitness
        out.brain = self.brain.clone()
        out.speciesName = self.speciesName

    def calculateFitness(self):
        # To return the calculated fitness at any given point in time
        pass

    def getFitness(self):
        if not self.replay:
            return self.calculateFitness()
        return self.fitness

    def crossover(self, parent2):
        child = Player()
        child.brain = self.brain.crossover(parent2.brain)
        child.brain.generateNetwork()
        return child
Ejemplo n.º 5
0
 def regexsearch(self, regex):
     """search in a string with a re"""
     genome = Genome()
     container = FeatureListContainer()
     print genome.getSequence()
     flist = []
     flist.append(re.findall(regex, genome.getSequence() , re.I))
     print flist
Ejemplo n.º 6
0
 def tf_idf_training(comment_cnt_lower_bound, train_ratio):
     # train_set, cv_set = Train.simple_partition(comment_cnt_lower_bound, \
     #         train_ratio)
     # idf_dict = Feature.cal_idf(train_set, Config.train_idf_path)
     train_set = Train.get_train_set()
     Feature.cal_tf_idf(train_set, Config.train_idf_path,
                        Config.train_tf_idf_path, True, 200)
     Genome.cal_tf_idf(Config.train_tf_idf_path, \
             Config.train_genome_tf_idf_path, 200)
Ejemplo n.º 7
0
def initializePool(env):
    pool = Pool(env)
    for i in range(0, Population):
        basic = Genome(pool)
        basic.basicGenome()
        pool.addToSpecies(basic)

    pool.initializeRun()
    return pool
Ejemplo n.º 8
0
	def get_child(self, parent1, parent2):
		new_genome = Genome(weight_mutation=self.weight_mutation, input_nodes=self.inputs, output_nodes=self.outputs, genome_id=self.genome_id)
		self.genome_id += 1

		fitness1 = parent1.get_fitness()
		fitness2 = parent2.get_fitness()

		if(fitness1 > fitness2):
			genome1 = parent1
			genome2 = parent2
		else:
			genome1 = parent2
			genome2 = parent1

		for conn1 in genome1.get_connection_genes():
			copy_con1 = copy.deepcopy(conn1)

			excessGene = True
			disjointGene = False
			newConn = 0
			for conn2 in genome2.get_connection_genes():
				copy_con2 = copy.deepcopy(conn2)
				# Both have connection with this innovation number
				if(conn1.get_innovation_number() == conn2.get_innovation_number()):
					excessGene = False
					# The expressed parameter is not matching
					if(conn1.expressed != conn2.expressed):
						disjointGene = True
					# Get the deltaWeight, because everything seems to be in order
					else:
						newConn = copy_con1 if(randint(0,1) == 1) else copy_con2
			
			if(excessGene == True):
				new_genome.connection_genes.append(copy_con1)
			elif(disjointGene == True):
				new_genome.connection_genes.append(copy_con1)
			else:
				new_genome.connection_genes.append(newConn)

			#Add node
			#nodeIn, nodeOut = copy.deepcopy(new_genome.connection_genes[-1].get_connected_nodes())
			nodeInID, nodeOutID = copy.copy(new_genome.connection_genes[-1].get_connected_nodes_id())

			#Add node if there wasnt any node like this before
			if(new_genome.get_node_by_id(nodeInID) == False):
				node = copy.deepcopy(genome1.get_node_by_id(nodeInID))
				new_genome.node_genes.append(node)
			if(new_genome.get_node_by_id(nodeOutID) == False):
				node = copy.deepcopy(genome1.get_node_by_id(nodeOutID))
				new_genome.node_genes.append(node)

			# Get the higher global node id
			copy_gnID1 = copy.copy(parent1.global_node_id)
			copy_gnID2 = copy.copy(parent2.global_node_id)
			new_genome.global_node_id = copy_gnID1 if(copy_gnID1>copy_gnID2) else copy_gnID2

		return new_genome
Ejemplo n.º 9
0
    def tournament(self, prev_generation):
        new_genome = Genome()

        # pick two parents
        parent_genome_1 = pick_parent(prev_generation)
        parent_genome_2 = pick_parent(prev_generation)

        new_genome.solution = crossover(parent_genome_1, parent_genome_2)
        new_genome.solution = mutate(new_genome.solution, self.mutation_rate)

        return new_genome
Ejemplo n.º 10
0
    def values(self, genome: Genome) -> List[float]:
        objectives: List[float] = [.0 for i in range(genome.nobjectives)]
        objectives[0] = genome.variable(0)

        sumv: float = 0
        for i in range(genome.nvariables):
            sumv += genome.variable(i)
        g = 1 + (9.0 / (genome.nvariables - 1)) * sumv
        objectives[1] = g * (1 - sqrt(objectives[0] / g))

        return objectives
Ejemplo n.º 11
0
 def evolve_generation_speciated(self):
     # Clean out our previous species array
     species_list = []
     self.current_generation = {}
     # Find the fitness of all individuals of the previous generation,
     # And also assign a species to all of them
     for genome in self.previous_generation.values():
         genome.fitness = self.fitness_function(genome)
         genome_added = False
         for species in species_list:
             cd = Genome.compatibility_distance(self.previous_generation[species.representor], genome, self.config.PARAM_C1, self.config.PARAM_C2, self.config.PARAM_C3)
             if cd <= self.config.COMPATIBILITY_THRESHOLD:
                 species.add_genome(genome.id, genome.fitness)
                 genome_added = True
                 break
         if not genome_added:
             species_list.append(Species(genome.id, genome.fitness))
         species_list.sort(key=lambda species: species.total_fitness, reverse=True)
     # Recalculate the adjusted fitness based on species
     for species in species_list:
         for genome_id in species.genomes:
             self.previous_generation[genome_id].fitness /= species.count_genomes
     # Kill off low performing individuals
     fitness_sorted_genome_ids = [genome[0] for genome in sorted(self.previous_generation.items(), key=lambda genome: genome[1].fitness, reverse=True)]
     parents = fitness_sorted_genome_ids[:int(self.config.SELECTION_RATIO * self.config.POPULATION_SIZE)]
     for species in species_list:
         for genome_id in species.genomes:
             if genome_id not in parents:
                 species.genomes.remove(genome_id)
     species = [species for species in species_list if species]
     
     # Allocate child count per-species and create children
     total_fitness = reduce(lambda a, b: a + b, [s.total_fitness for s in species_list])
     if total_fitness > 0.0:
         allocation_ratio = self.config.POPULATION_SIZE / total_fitness
         per_species_allocation = [(index, int(species.total_fitness * allocation_ratio)) for index, species in enumerate(species_list)]
         # Create offspring to fill up remaining space by random mating and mutations based on species size
         for index, child_count in per_species_allocation:
             for _ in range(child_count):
                 parent1 = self.previous_generation[self.random.choice(species_list[index].genomes)]
                 parent2 = self.previous_generation[self.random.choice([genome for genome in species_list[index].genomes if genome != parent1])]
                 child = Genome.generate_offspring(parent1, self.max_id, self.random, self.node_classes, self.innovator, self.config, parent2)
                 self.current_generation[child.id] = child
     
     present_population_size = len(self.current_generation)
     while present_population_size <= self.config.POPULATION_SIZE:
         parent = self.previous_generation[self.random.choice(parents)]
         child = Genome.generate_offspring(parent, self.max_id, self.random, self.node_classes, self.innovator, self.config)
         self.current_generation[child.id] = child
         present_population_size += 1
         self.max_id += 1
     # Update all lists and perform logging
     self.previous_generation = self.current_generation
Ejemplo n.º 12
0
    def breedChild(self):
        child = Genome()
        if random.random() < CrossoverChance:
            g1 = self.genomes[random.randint(1, len(self.genomes))]
            g2 = self.genomes[random.randint(1, len(self.genomes))]
            child = g1.crossover(g2)
        else:
            g = self.genomes[random.randint(1, len(self.genomes))]
            child = g.clone()

        child.mutate()
        return child
Ejemplo n.º 13
0
 def __init__(self):
     assert INPUTS != 0 and OUTPUTS != 0, "You must call the initialize method before creating players!"
     self.fitness = -1
     self.unadjustedFitness = -1
     self.brain = Genome(INPUTS, OUTPUTS, False)
     self.vision = []
     self.actions = []
     self.lifespan = 0
     self.dead = False
     self.replay = False
     self.gen = 0
     self.name = ""
     self.speciesName = "Not yet defined"
Ejemplo n.º 14
0
	def get_clone(self, genome):
		# Create a child
		child = Genome(weight_mutation=self.weight_mutation, input_nodes=self.inputs, output_nodes=self.outputs, genome_id=self.genome_id)
		self.genome_id += 1

		# Copy the parents genes to the child
		child.connection_genes = copy.deepcopy(genome.connection_genes)
		child.node_genes = copy.deepcopy(genome.node_genes)
		child.global_node_id = copy.copy(genome.global_node_id)

		# I dont have to mutate the child, it will be mutated all together.

		return child
Ejemplo n.º 15
0
    def initialize_population(self, population=None, size=None):
        if population is not None:
            self.previous_generation = population
            self.max_id = len(population.keys())
        elif size is not None:
            self.max_id = self.config.POPULATION_SIZE
            pop_size = self.config.POPULATION_SIZE
            input_size = size[0]
            output_size = size[1]

            for genome_id in range(pop_size):
                genome = Genome(genome_id)

                node_id = 0
                # Input Nodes
                for _ in range(input_size):
                    node_gene = self.random.choice(self.node_classes)(node_id, NodeType.INPUT)
                    genome.add_node_gene(node_gene)
                    node_id += 1
                
                # Output Nodes
                for _ in range(output_size):
                    node_gene = self.random.choice(self.node_classes)(node_id, NodeType.OUTPUT)
                    genome.add_node_gene(node_gene)
                    node_id += 1
                
                # Connect each input to every other output
                for in_id in range(input_size):
                    for out_id in range(output_size):
                        connection = ConnectionGene(in_id, out_id + input_size, 1.0, True, self.innovator.next_innovation_number((in_id, out_id)))
                        genome.add_connection_gene(connection)

                self.previous_generation[genome_id] = genome
        else:
            raise ValueError("Invalid Parameters")
Ejemplo n.º 16
0
	def create_members(self):
		nkey = str(self.niecheID)
		self.nieches[nkey] = Nieche(self.niecheID)
		
		for i in range(self.members):
			# Create a new genome
			gkey = str(self.genome_id)
			self.genomes[gkey] = Genome(
				weight_mutation = self.weight_mutation,
				input_nodes = self.inputs,
				output_nodes = self.outputs,
				genome_id = self.genome_id)
			
			# Create input, output nodes, and connect them
			self.genomes[gkey].create_inputs()
			self.genomes[gkey].create_outputs()
			self.genomes[gkey].create_innitial_connections()
			
			# Tell which nieche this node is belongs to, at init all belongs to the same, the 0
			self.genomes[gkey].set_nieche_id(self.niecheID)

			# At init add every member to the first nieche/species
			self.nieches[nkey].add_member(self.genome_id)
			
			# Increment ID
			self.genome_id += 1

		#The newly created Genomes has init innovations, connection between outputs and inputs
		#which we need to group together
		self.group_innovations()

		self.niecheID += 1
Ejemplo n.º 17
0
    def run(self, ngeneration: int, populationsize: int, crossoverrate: float,
            mutationrate: float, problem: ZDTOne):
        self._population = NonDominatingSortingPopulation()
        for generationcount in range(populationsize):
            gene: Genome = Genome(2, 2)
            gene.calculate_fitnesses(problem)
            self._population.add_genome(gene)

        self._population.rank()

        for generationcount in tqdm(range(ngeneration)):
            for nchildren in range(populationsize):
                parent1: Genome = self.tournament_selection(populationsize)
                parent2: Genome = self.tournament_selection(populationsize)

                rand = random()

                if (rand < crossoverrate):
                    self.crossover(parent1, parent2)
                else:
                    self._population.add_genome(parent1)
                    self._population.add_genome(parent2)

                if (random() < mutationrate):
                    self.mutate(self._population.get_genome(-1))
                    self.mutate(self._population.get_genome(-2))

                self._population.get_genome(-1).calculate_fitnesses(problem)
                self._population.get_genome(-2).calculate_fitnesses(problem)

            self._population.rank()
            self._population.truncate(populationsize)

        return self._population
def create_asexual_genome(parent,
                          mutation_tracker,
                          newNodeProb=0.03,
                          newConnectionProb=0.05,
                          alterConnectionProb=0.8,
                          newConnectionValueProb=0.1):

    new_c_genes = {}
    new_n_genes = {}
    # clone the parent
    for key, value in parent.n_genes.items():
        new_n_genes[key] = copy.deepcopy(value)
    for key, value in parent.c_genes.items():
        new_c_genes[key] = copy.deepcopy(value)

    # apply mutation to all connection
    for c_key in new_c_genes:
        if not new_c_genes[c_key].disable:
            if np.random.uniform(0, 1) < alterConnectionProb:
                new_c_genes[c_key] = alter_connection(new_c_genes[c_key],
                                                      newConnectionValueProb)

    child_genome = Genome(parent.input_size, parent.output_size, new_n_genes,
                          new_c_genes, parent.generation + 1,
                          [parent.species_id])

    # apply new nodes mutation
    if np.random.uniform(0, 1) < newNodeProb:
        add_node_mutation(child_genome, mutation_tracker)
    # apply new connection mutation
    if np.random.uniform(0, 1) < newConnectionProb:
        add_connection_mutation(child_genome, mutation_tracker)

    return child_genome
Ejemplo n.º 19
0
def simple_trial():
    i = Innovator()
    r = Random()
    config = {
        "MUTATION_RATE": 0.0,
        "CONNECTION_MUTATION_RATE": 0.0,
        "NODE_MUTATION_RATE": 0.0,
        "DISABLED_GENE_INHERITING_CHANCE": 1.0,
    }

    config = DottedDict(config)

    nodes = 4
    to_remove = 2

    g1 = generate_complete_genome(1, nodes, r, i)
    g2 = generate_complete_genome(2, nodes, r, i)

    g1.fitness = 10.0
    g2.fitness = 0.0

    for key in r.sample(g1.connection_genes.keys(), to_remove):
        del g1.connection_genes[key]

    for key in r.sample(g2.connection_genes.keys(), to_remove):
        del g2.connection_genes[key]

    gc = Genome.generate_offspring(g1, 3, r, [TestNode], i, config, genomeB=g2)

    g1.vizualize_genome(1, "g1")
    g2.vizualize_genome(2, "g2")
    gc.vizualize_genome(3, "gc")

    plt.show()
def create_new_genome(input_size, output_size, fully_connected=False):
    nodes_genes = {}
    for i in range(0, input_size):
        nodes_genes[i] = NodeGene(input_nodes=None,
                                  output_nodes=[],
                                  neuron_type='i')
    for j in range(input_size, input_size + output_size):
        nodes_genes[j] = NodeGene(input_nodes=[],
                                  output_nodes=[],
                                  neuron_type='o')

    cpt = input_size + output_size
    connection_genes = dict()
    if fully_connected:
        for i in range(0, input_size):
            for j in range(input_size, input_size + output_size):
                connection_genes[i,
                                 j] = ConnectionGene(cpt,
                                                     Mutation.get_new_weight(),
                                                     False)
                nodes_genes[i].output_nodes.append(j)
                nodes_genes[j].input_nodes.append(i)
                cpt += 1

    return Genome(input_size=input_size,
                  output_size=output_size,
                  nodes_genes=nodes_genes,
                  connection_genes=connection_genes,
                  generation=0,
                  parents_species_id=[])
Ejemplo n.º 21
0
def genome_halving():
    # Create the dictionary of genomes from the input file
    genomes: Dict[str, List[str]] = parse_genomes()

    # Get the first 2 genomes from the input file
    values_view: ValuesView[List[str]] = genomes.values()
    value_iterator: Iterator[List[str]] = iter(values_view)
    tetrad: List[str] = next(value_iterator)
    outgroup: List[str] = next(value_iterator)

    # Get GenomeHalving configuration options
    to_replace: int = config_get(CONFIG_GENOME_REPLACE)
    if type(to_replace) is not int:
        raise Exception(
            "Config attribute \"genome_to_replace\" needs to be a number.\n")
    elif to_replace not in range(0, 3):
        raise Exception("Genome to replace must be 0, 1, or 2.\n")

    # Perform Guided Genome Halving on the given tetrad and outgroup genomes
    ggh: GroupGraph = GroupGraph(Genome.from_strings(tetrad),
                                 Genome.from_strings(outgroup), to_replace)
    ggh.get_result()

    bpg_distance: BPGDistance = BPGDistance(Genome.from_strings(tetrad),
                                            ggh.ancestor_AA)
    bpg_distance.calculate_distance()
    distance_1: int = bpg_distance.distance

    bpg_distance = BPGDistance(Genome.from_strings(outgroup), ggh.ancestor_A)
    bpg_distance.calculate_distance()
    distance_2: int = bpg_distance.distance

    total_distance: int = distance_1 + distance_2

    print(
        "\nd(AA, tetra) = " + str(distance_1) + " | d(A,outgroup) = " +
        str(distance_2), " | total = " + str(total_distance) + "\n")

    print("\n-\nGenome ancestor_AA:\n")

    for i in range(len(ggh.ancestor_AA.chromosomes)):
        print(str(ggh.ancestor_AA.chromosomes[i]))

    print("\n-\nGenome ancestor_A:\n")

    for i in range(len(ggh.ancestor_A.chromosomes)):
        print(str(ggh.ancestor_A.chromosomes[i]))
Ejemplo n.º 22
0
    def run(self):

        population = []
        best_genome = None
        data = []

        for _ in range(population_total):
            genome_dna = generate(problem_grid)
            population.append(Genome(genome_dna))

        for i in range(simulations):
            population_fitness = 0

            for genome in population:
                genome.fitness()
                population_fitness += genome.getFitness()

            sorted_population = population.copy()
            best_genome = max(population, key=operator.attrgetter('fit'))
            best_fitness = round(1 / best_genome.getFitness())
            data.append(best_fitness)

            if i % 1000 == 0:
                self.show(i, best_genome)

            if best_fitness <= limit:
                print('DONE\n')
            population.clear()
            population.append(best_genome)

            while len(population) < population_total:

                new_genome = tournamentSelection(sorted_population)
                option_2 = tournamentSelection(sorted_population)

                if npR.uniform() < crossover_rate:
                    option_3 = tournamentSelection(sorted_population)
                    dna_1 = crossover(new_genome, option_2, option_3)
                    new_genome = Genome(dna_1)

                if npR.uniform() < mutation_rate:
                    new_genome.mutateSell(problem_grid)

                population.append(new_genome)

        self.show(i, best_genome)
        return data
Ejemplo n.º 23
0
    def movie_genome_sim(douban_id, genome_id, movie_tf_idf_path, \
            genome_tf_idf_path):
        movie_tf_idf_dict = dict(Feature.get_tf_idf_from_file(douban_id, \
                movie_tf_idf_path))
        genome_tf_idf_dict = dict(Genome.get_tf_idf_from_file(genome_id, \
                genome_tf_idf_path))

        return Tagging.cos_sim([movie_tf_idf_dict, genome_tf_idf_dict])
Ejemplo n.º 24
0
    def loadFile(self, filename, env):
        file = open(filename, "r")
        self.__init__(env)
        self.generation = int(file.readline().replace("\n", ""))
        self.maxFitness = int(file.readline().replace("\n", ""))
        #gui.settext(5, 8, maxFitnessLabel, "Max Fitness. " .. math.floor(pool.maxFitness))
        numSpecies = int(file.readline().replace("\n", ""))
        for s in range(0, numSpecies):
            species = Species()
            self.species.append(species)
            species.topFitness = float(file.readline().replace("\n", ""))
            species.staleness = int(file.readline().replace("\n", ""))
            numGenomes = int(file.readline().replace("\n", ""))
            for g in range(0, numGenomes):
                genome = Genome(self)
                species.genomes.append(genome)
                genome.fitness = float(file.readline().replace("\n", ""))
                genome.maxneuron = int(file.readline().replace("\n", ""))
                line = file.readline().replace("\n", "")
                while line != "done":
                    genome.mutationRates[line] = float(file.readline().replace(
                        "\n", ""))
                    line = file.readline().replace("\n", "")
                numGenes = int(file.readline().replace("\n", ""))
                for n in range(0, numGenes):
                    gene = Gene()
                    genome.genes.append(gene)
                    enabled = 0
                    line = file.readline()
                    data = []
                    for x in [x for i, x in enumerate(line.split(" "))]:
                        try:
                            data.append(int(x))
                        except ValueError:
                            data.append(float(x))
                    gene.into, gene.out, gene.weight, gene.innovation, enabled = data

                    gene.enabled = enabled == 1
        file.close()

        while self.fitnessAlreadyMeasured():
            self.nextGenome()
        self.initializeRun()
        self.currentFrame = self.currentFrame + 1
Ejemplo n.º 25
0
class UnitOfWork:
    
    _dataSet=[]
    _genomes=[Genome('Num_units',20,50),
              Genome('learning_rate',0. 0020,0.0030),
              Genome('lambda_loss_amount',0.0010,0.0020),
              Genome('Batch_size',1000,2000),
              Genome('Num_iterations',100,500),
              Genome('Segment_size',100,200),
              ]
    _popSize=10
    _perMut=0.5
    _iteration=22
    
# _test_user_ids=[2, 4, 9, 10, 12, 13, 18, 20, 24] in DataSet

    def __init__(self, pathDataset='datasets/uci_raw_data'):
        _genomes=[]
        self._dataSet=DataSet(pathDataset,'l')  
Ejemplo n.º 26
0
def genome_aliquoting():
    """
    See the 2010 paper, section 2.5

    """
    raise Exception("Genome Aliquoting is not functional yet.")

    # Create the dictionary of genomes from the input file
    genomes: Dict[str, List[str]] = parse_genomes()

    # Get the first 2 genomes from the input file
    values_view: ValuesView[List[str]] = genomes.values()
    value_iterator: Iterator[List[str]] = iter(values_view)
    polyd: Genome = Genome.from_strings(next(value_iterator))
    reference: List[str] = next(value_iterator)
    ploidy: int = count_ploidy(polyd)

    # Get Aliquoting configuration options
    to_replace: int = config_get(CONFIG_GENOME_REPLACE)
    if type(to_replace) is not int:
        raise Exception(
            "Config attribute \"genome_to_replace\" needs to be a number.\n")
    elif to_replace not in range(0, ploidy + 1):
        raise Exception(
            "Genome to replace must be non-negative and less than the number of poly genome copies (n).\n"
        )

    # Perform Genome aliquoting on the given polyd genome
    alq: Aliquoting = Aliquoting(polyd, Genome.from_strings(reference),
                                 to_replace, ploidy)
    alq.get_result()

    bpg_distance: BPGDistance = BPGDistance(polyd, alq.ideal_ancestor)
    bpg_distance.calculate_distance()
    distance: int = bpg_distance.distance

    print("\nd(Am, polyd) = " + str(distance) + "\n")

    print("\n-\nGenome ancestor_A(m):\n")

    for i in range(len(alq.ideal_ancestor.chromosomes)):
        print(str(alq.ideal_ancestor.chromosomes[i]))
Ejemplo n.º 27
0
 def apply(self, mutant_vector, target_vector, Cr):
     genes = []
     for j in range(0, len(mutant_vector.get_genes())):
         randji = random.uniform(0, 1)
         Jrand = random.randint(0, len(mutant_vector.get_genes()))
         if randji <= Cr or j == Jrand:
             genes.append(mutant_vector.get_genes()[j])
         else:
             genes.append(target_vector.get_genes()[j])
     uig = Genome(genes)
     return uig
Ejemplo n.º 28
0
    def __init__(self, psize, bounds):
        self.population_list = []

        i = 0
        while i < psize:
            genes = []
            for j in bounds:
                genes.append(random.uniform(j[0], j[1]))
            new_genome = Genome(genes)
            self.population_list.append(new_genome)
            i = i + 1
Ejemplo n.º 29
0
	def from_gen_file ( file_name , old = False ):
		"""
			imports a (unaligned) gen_file and returns a GenomeCompare object
			@params
				file_name: the name of the file containing the genomes
				old / boolean / False
					if this file was created before June 23rd 2015, it is likely to be in the float format.
					use True in that case only.
		"""

		import csv
		genomes = {}
		with open( file_name , 'r' ) as f:
			reader = csv.reader( f ) 
			for row in reader:
				if old:
					genomes[ int( row[0] ) ] = Genome.from_mutated_loci( map( float , row[2:] ) , mutation_rate = int( row[1] ) , name = int( row[0] ) ) 
				else:
					genomes[ int( row[0] ) ] = Genome.from_mutated_loci( map( int , row[2:] ) , mutation_rate = int( row[1] ) , name = int( row[0] ) ) 

		return GenomeCompare( genomes = genomes )
Ejemplo n.º 30
0
    def load(string) -> Specie:
        representative_str, string = remove_tag("representative", string)
        age_str, string = remove_tag("age", string)
        niche_fitness_str, string = remove_tag("niche_fitness", string)
        max_fitness_str, string = remove_tag("max_fitness", string)
        genomes_str, string = remove_tag("genomes", string)

        representative = Genome.load(representative_str)
        age = int(age_str)
        niche_fitness = float(niche_fitness_str)
        max_fitness = float(max_fitness_str)

        genomes = []
        while genomes_str:
            genome_str, genomes_str = remove_tag("genome", genomes_str)
            genome = Genome.load(genome_str)
            genomes.append(genome)
        specie = Specie(representative, genomes, age, max_fitness)
        specie.niche_fitness = niche_fitness

        return specie
Ejemplo n.º 31
0
 def scanForRRNA(self):
     # !!!!!!!!!!!!!!
     lHits = []
     #for (sDomain, sRfam) in dDomain2Rfam.items():
     #    dRfam2File[sRfam].close()
     #    sRfamModel = dDomain2Rfam[sDomain]
     #    sFileName = '%s.%s'%(sSequenceData.split('/')[-1], sRfamModel)
     #    if sAdditionalName != None:
     #        sFileName = '%s.%s.%s'%(sAdditionalName, sSequenceData.split('/')[-1], sRfamModel)
     #    os.system('cmsearch --cpu 4 --tblout %s.tblout  -o %s.o --notextw %s.rfam_14_3.cm %s'%(sFileName,  sFileName, sRfamModel, sSequenceData))
     #    lHits += ParseCm(open('%s.tblout'%(sFileName))).next()
     #fileOut.close()
     #os.system('cmpress -F rfam.ssu_rrnas.cm')
     #os.system('cmscan --cpu 4 --notextw rfam.ssu_rrnas.cm %s'%(sSequenceData))
     #dSeqs = SeqIO(open(sSequenceData)).getDict()
     genome = Genome(sSequenceData)
     lHits = sorted(lHits, key=lambda x:(-x.score, x.eval))
     fileOut = open('%s.rrnas.fasta'%(sAdditionalName),'w')
     dSeq2RangesUsed = {}
     for hit in lHits:
         if hit.eval > 1e-3:
             continue
         iStart, iEnd = hit.hitStart-1, hit.hitEnd
         sSeq = genome.seqs[genome.dSeqId2SeqIndex[hit.hitId]].seq[iStart:iEnd]
         if hit.strand == '-':
             iStart, iEnd = hit.hitEnd-1, hit.hitStart
             sSeq = genome.reverseComplement(genome.seqs[genome.dSeqId2SeqIndex[hit.hitId]].seq[iStart:iEnd])
         bOverlapCheck = True
         if hit.hitId in dSeq2RangesUsed:
             for (iStart2, iEnd2) in dSeq2RangesUsed[hit.hitId]:
                 iOverlap = min(iEnd, iEnd2)-max(iStart, iStart2)+1
                 if iOverlap >= 1:
                     bOverlapCheck = False
                     break
         if bOverlapCheck:
             if hit.hitId not in dSeq2RangesUsed:
                 dSeq2RangesUsed[hit.hitId] = []
             dSeq2RangesUsed[hit.hitId].append((iStart, iEnd))
             fileOut.write('>%s.%s.%s.%i.%i %f %s %f\n%s\n'%(sAdditionalName, hit.queryId, hit.hitId, hit.hitStart, hit.hitEnd, hit.eval, hit.strand, hit.gc, sSeq))
     fileOut.close()
Ejemplo n.º 32
0
 def get_genome_sim_list(douban_id, movie_tf_idf_path, genome_tf_idf_path, \
         genome_num=10):
     sim_dict = {}
     genome_dict = Genome.load_genome_dict()
     for genome_id in genome_dict:
         sim = Tagging.movie_genome_sim(douban_id, genome_id, \
                 movie_tf_idf_path, genome_tf_idf_path)
         sim_dict[genome_id] = sim
     genome_list = sorted(sim_dict.items(), key=lambda x: -x[1])
     # genome_name_list = map(lambda x: (x[0], genome_dict[x[0]]['name'], \
     #         x[1]), genome_list)
     # return genome_name_list[:genome_num]
     return genome_list
Ejemplo n.º 33
0
    def from_aligned_gen_file(file_name):
        """
			imports the new format of aligned gen_file and returns a GenomeCompare object
		"""
        print 'Please use GenomeCompare2 for better speeds with aligned genome files'
        import csv
        with open(file_name, 'r') as f:
            reader = csv.reader(f)
            rownum, titles = 0, []
            mapper = []
            genomes = []
            for row in reader:
                if rownum == 0:
                    mutations_in_file = row[3:]
                    # create a mapper of mutation objects
                    for key, mutation in enumerate(mutations_in_file):
                        mapper.append((key, Mutation(int(mutation))))
                    mapper = dict(mapper)
                    rownum += 1

                else:
                    genome_name = row[1]
                    mutation_rate = row[2]
                    genome_mutations_rep = row[3:]
                    genome_mutations_rep = map(int,
                                               map(int, genome_mutations_rep))
                    genome_mutations = []
                    for key, mutation_state in enumerate(genome_mutations_rep):
                        # print mutation_state
                        if mutation_state == 1:
                            genome_mutations.append(mapper[key])

                    genome_to_create = Genome(mutation_rate=int(mutation_rate),
                                              name=str(genome_name))
                    genome_to_create.mutated_loci = genome_mutations
                    genomes.append(genome_to_create)

        return GenomeCompare(genomes=genomes)
Ejemplo n.º 34
0
	def from_aligned_gen_file ( file_name ):
		"""
			imports the new format of aligned gen_file and returns a GenomeCompare object
		"""
		print 'Please use GenomeCompare2 for better speeds with aligned genome files' 
		import csv
		with open ( file_name , 'r' ) as f:
			reader = csv.reader( f )
			rownum , titles = 0 , []
			mapper = []
			genomes = []
			for row in reader:
				if rownum == 0:
					mutations_in_file = row[3:]
					# create a mapper of mutation objects
					for key, mutation in enumerate( mutations_in_file ):
						mapper.append( ( key , Mutation( int( mutation ) ) ) )
					mapper = dict(mapper)
					rownum += 1

				else:
					genome_name = row[1]
					mutation_rate = row[2]
					genome_mutations_rep = row[3:]
					genome_mutations_rep = map( int , map( int , genome_mutations_rep ) )
					genome_mutations = []
					for key , mutation_state in enumerate( genome_mutations_rep ):
						# print mutation_state
						if mutation_state == 1:
							genome_mutations.append( mapper[ key ] )

					genome_to_create = Genome( mutation_rate = int( mutation_rate ) , name = str( genome_name ) )
					genome_to_create.mutated_loci = genome_mutations
					genomes.append( genome_to_create )

		
		return GenomeCompare( genomes = genomes )