def create_icd10_lookup(): """Precomputes the mental and nervous system disorder columns of the ICD10 dataset for faster subject comparison. See Chapters V and VI http://biobank.ndph.ox.ac.uk/showcase/field.cgi?id=41270 :return dataframe row-indexed by subject IDs and and boolean columns indexed by disease. """ icd10 = pd.read_csv(data_icd10, sep=',') icd10.index = ['UKB' + str(eid) for eid in icd10['eid']] subject_ids = np.load(SUBJECT_IDS, allow_pickle=True) biobank_uids = Phenotype.get_biobank_codes(Phenotype.ICD10) icd10 = icd10.loc[subject_ids, biobank_uids] icd10_lookup = pd.DataFrame(index=icd10.index) # Determine if the the patient has the occurrence of a particular disease. si = icd10.index.to_series() ci = np.concatenate((Phenotype.get_icd10_mental_disorder_codes(), Phenotype.get_icd10_nervous_system_disorder_codes())) for c in ci: icd10_lookup[c] = pd.Series( si.apply(lambda s: np.any([ k.startswith(c) for k in icd10.loc[s, :].to_numpy().astype('str') ]))) icd10_lookup = icd10_lookup.sort_index() icd10_lookup.to_pickle(ICD10_LOOKUP) return icd10_lookup
class Genotype: #Add the map as a genotype and phenotype def __init__(self): self.chromosomes = np.zeros(0, dtype=int) def randomize(self, chromosomeSize, trajectory): self.level = Level(trajectory.level_width,trajectory.level_height) self.level.generate_from_trajectory(trajectory, random.uniform(0,1)) self.phenotype = Phenotype(self.level) self.chromosomes = self.level.cells.flatten() self.trajectory = trajectory # np.set_printoptions(threshold=np.nan) # print(self.chromosomes) # print("another genotype") """ for i in range(chromosomeSize): self.chromosomes.append(random.randint(0,1)) """ def getPhenotype(self): self.phenotype.levelFromChromosomes(self.chromosomes, self.trajectory, self.trajectory.level_width, self.trajectory.level_height) return self.phenotype
def randomize(self, chromosomeSize, trajectory): self.level = Level(trajectory.level_width,trajectory.level_height) self.level.generate_from_trajectory(trajectory, random.uniform(0,1)) self.phenotype = Phenotype(self.level) self.chromosomes = self.level.cells.flatten() self.trajectory = trajectory # np.set_printoptions(threshold=np.nan) # print(self.chromosomes) # print("another genotype") """
def __init__(self, nb_entrees, nb_sorties, idInd): self.nb_e = nb_entrees self.nb_s = nb_sorties self.id = idInd self.espece = None self.genome = Genome(self.nb_e, self.nb_s) self.phenotype = Phenotype(self.nb_e, self.nb_s) self.idToPos = { } #Ce tableau fera l'interface entre le genome et l'individu self.fitness = None self.sharedFitness = None
def evaluate_test_set_performance(model_dir): """Measures the test set performance of the model under the specified model directory. :param model_dir: directory containing the model state dictionaries for each fold and the model configuration (including the population graph parameterisation) :return: the test set performance for each fold. """ with open(os.path.join(model_dir, 'config.yaml')) as file: cfg = yaml.full_load(file) graph_name = cfg['graph_name']['value'] conv_type = cfg['model']['value'] n_conv_layers = cfg['n_conv_layers']['value'] layer_sizes = ast.literal_eval(cfg['layer_sizes']['value']) dropout_p = cfg['dropout']['value'] similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]] similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1] if graph_name not in GRAPH_NAMES: graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set, similarity_threshold=similarity_threshold, functional=False, structural=True, euler=True) graph = graph_construct.load_population_graph(graph_root, graph_name) folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5) results = {} for i, fold in enumerate(folds): brain_gnn_train.set_training_masks(graph, *fold) graph_transform.graph_feature_transform(graph) if ConvTypes(conv_type) == ConvTypes.GCN: model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p) else: model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p) model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(i)))) model = model.to(device) model.eval() data = graph.to(device) model = model(data) predicted = model[data.test_mask].cpu() actual = graph.y[data.test_mask].cpu() r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy()) r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten()) results['fold_{}'.format(i)] = {'r': [x.item() for x in r], 'r2': r2.item()} mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy()) results=mse break return results
def retrieveData(): ''' Retrieves data from API and puts them into a dictionary. A phenotype value is paired with a list of its name description and score key ''' token = 'GENOMELINKTEST' headers = {'Authorization': 'Bearer {}'.format(token)} phenotypes = [ 'carbohydrate-intake', 'protein-intake', 'vitamin-a', 'vitamin-b12', 'vitamin-d', 'vitamin-e', 'calcium', 'magnesium', 'iron', 'endurance-performance' ] population = 'european' for phenotype in phenotypes: report_url = 'https://genomicexplorer.io/v1/reports/{}?population={}'.format( phenotype, population) response = requests.get(report_url, headers=headers) data = response.json() data_str = json.dumps(data) data_dict = json.loads(data_str) p = Phenotype(data_dict["phenotype"]["display_name"], data_dict["summary"]["text"], data_dict["summary"]["score"]) phenotypeDict[p._phenotype] = p._score
def __init__(self): #-----location part if parameters.location_mode: self.mesh_size = 1./parameters.LD0range/2 f = lambda size: [[{} for x in xrange(int(size)+1)] for y in xrange(int(size)+1)] self.mesh = f(self.mesh_size) map_path = parameters.map_phenotype_image(parameters.maps) self.load_terrain(map_path+".info.tmp", map_path+".tmp") #------------------ from plant import Plant from phenotype import Phenotype self.plants = {} self.allplantslist = [] self.generation = 0 self.__class__.default = self self.__class__.environments += 1 debug.g("niche %d" % parameters.niche_size) for i in xrange(parameters.niche_size): if parameters.location_mode: Plant.new(parameters.get_start_point(parameters.maps)) else: Plant.new((0,0)) debug.g("*** %d" % len(self.plants)) self.optimal_global_phenotype = Phenotype() self.base_phenotype = Phenotype() self.survivors = parameters.niche_size self.randomkiller = selectors.KillerRandom() (self.killer, self.reproducer) = selectors.getSelectors() self.phenotype_link = Phenotype self.history = History(self) self.history.update()
def average_distance(self, mc_samples = None): if mc_samples == None: ret = 0.0 for plant1 in self.plants.values(): for plant2 in self.plants.values(): ret += Phenotype.distance(plant1.phenotype, plant2.phenotype) numpl = len(self.plants) if numpl == 0: return 0. return ret / (numpl**2 - numpl) else: ret = 0.0 tries_done = 0 tries_valid = 0 spk = self.plants.keys() if spk == []: return 0. while tries_done < mc_samples: p1 = random.choice(spk) p2 = random.choice(spk) tries_done += 1 if p1 != p2: tries_valid += 1 ret += self.phenotype_link.distance(self.plants[p1].phenotype, self.plants[p2].phenotype) if tries_valid == 0: tries_valid += 1 return ret / tries_valid
def __crossover__(self, population, ori): aux = [] for f, m in population: index = np.arange(int(len(f.__dict__))) np.random.shuffle(index) son1 = dict( np.concatenate( (np.array(list(f.__dict__.items()))[ index[:int(len(f.__dict__) * self.crossover)]], np.array(list(m.__dict__.items()))[ index[int(len(f.__dict__) * self.crossover):]]))) son1 = Phenotype(son1) son2 = dict( np.concatenate( (np.array(list(f.__dict__.items()))[ index[int(len(f.__dict__) * self.crossover):]], np.array(list(m.__dict__.items()))[ index[:int(len(f.__dict__) * self.crossover)]]))) son2 = Phenotype(son2) aux += [son1, son2] ori.addPhenotype(aux) return ori
def distance_to_opt(self): #magic trick by @MKitlas optimal_ph = 1410 plant_ph = self.phenotype if p.location_mode: optimal_ph = Environment.default.optimal_phenotype_on_map(self.location) else: optimal_ph = Environment.default.optimal_phenotype_without_map() # TODO sum up TEs effect. for t in self.aut_transposons_list: if p.multidim_changes: plant_ph = Phenotype.add(plant_ph, t.mutation_rate) else: plant_ph[t.trait_no] += t.mutation_rate for t in self.nonaut_transposons_list: if p.multidim_changes: plant_ph = Phenotype.add(self.phenotype, t.mutation_rate) else: plant_ph[t.trait_no] += t.mutation_rate return Phenotype.distance(plant_ph, optimal_ph)
def __init__(self, no_transp = None, transp_activity = None): #debug.g("tworzenie nowej rosliny c.d. __init__()" )#, plant.fitness())) self.aut_transposons = no_transp if no_transp == None: self.aut_transposons = p.starting_transposons() self.nonaut_transposons = 0 ##KG--begin ##sexual_mode reproduction ##initializing a list of transposons in a new plant if p.sexual_mode : self.sex = 1 if random.random() > 0.5 else 0 self.nonaut_transposons_list = [] self.aut_transposons_list = [] for i in range(self.aut_transposons): te = Transposone(True) self.aut_transposons_list.append(te) else : # asexual all plants have sex=0 self.sex = 0 ##KG--end self.phenotype = Phenotype.new() self.id = self.__class__.counter #Environment.default.register_new_plant(self.id, self) replaced to new() self.environment = Environment.default self.dead = False self.__class__.counter += 1 self.transposase_activity = transp_activity if transp_activity == None: self.transposase_activity = p.starting_transposase_activity(self.aut_transposons) self.inactive_transposons = 0 self.transpositions = 0 self.total_mutations = 0 self.random_mutations = 0 self.ord_counter = self.__class__.order_cnt self.__class__.order_cnt += 1
def create_similarity_lookup(): """Precomputes the columns of the phenotype dataset for faster subject comparison. :return dataframe containing the values used for similarity comparison, row-indexed by subject ID and column-indexed by phenotype code name (e.g. 'AGE', 'FTE' etc.) """ phenotypes = pd.read_csv(data_phenotype, sep=',') phenotypes.index = ['UKB' + str(eid) for eid in phenotypes['eid']] biobank_feature_list = [] for feature in Phenotype: biobank_feature_list.extend(Phenotype.get_biobank_codes(feature)) phenotype_processed = phenotypes[biobank_feature_list] for feature in Phenotype: biobank_feature = Phenotype.get_biobank_codes(feature) if feature == Phenotype.MENTAL_HEALTH: mental_to_code = Phenotype.get_mental_to_code() # column names for summary (total number of conditions) + 18 possible condidions: MEN0, MEN1, ..., MEN18. mental_feature_codes = [ Phenotype.MENTAL_HEALTH.value + str(i) for i in range(19) ] # Replace string descriptions with their codes for consistency. phenotype_processed.loc[:, biobank_feature[0]] = phenotype_processed[ biobank_feature[0]].apply( lambda x: mental_to_code[x] if x in mental_to_code.keys() else None) # Determine if the the patient has the occurrence of a particular disease. si = phenotype_processed.index.to_series() for i in range(1, len(mental_feature_codes)): phenotype_processed.loc[:, Phenotype.MENTAL_HEALTH.value + str(i)] = si.apply(lambda s: int( i in phenotype_processed .loc[s, biobank_feature].to_numpy( ).astype(bool))) phenotype_processed.loc[:, mental_feature_codes[0]] = si.apply( lambda s: int( np.sum(phenotype_processed.loc[s, mental_feature_codes[1:]] ))) elif len(biobank_feature) > 1: # handle the more/less recent values si = phenotype_processed.index.to_series().copy() phenotype_processed.loc[:, feature.value] = si.apply( lambda s: get_most_recent(biobank_feature, s, phenotype_processed)) else: phenotype_processed.loc[:, feature.value] = phenotype_processed[ biobank_feature[0]].copy() # Filter only the subjects used in the final dataset. phenotype_processed = phenotype_processed.loc[precompute_subject_ids()] # Return only the final feature columns (indexed by code names). phenotype_processed.drop(biobank_feature_list, axis=1, inplace=True) phenotype_processed = phenotype_processed.sort_index() phenotype_processed.to_pickle(SIMILARITY_LOOKUP) return phenotype_processed
# Population graph parameters parser.add_argument('--functional', default=0, type=bool) parser.add_argument('--structural', default=1, type=bool) parser.add_argument('--euler', default=1, type=bool) parser.add_argument('--similarity', default="(['SEX', 'ICD10', 'FTE', 'NEU'], 0.8)", type=str) args = parser.parse_args() functional = args.functional structural = args.structural euler = args.euler similarity_feature_set = [ Phenotype(i) for i in ast.literal_eval(args.similarity)[0] ] similarity_threshold = ast.literal_eval(args.similarity)[1] graph_name = graph_construct.get_graph_name( functional=functional, structural=structural, euler=euler, similarity_feature_set=similarity_feature_set, similarity_threshold=similarity_threshold) if graph_name not in GRAPH_NAMES: graph_construct.construct_population_graph( similarity_feature_set=similarity_feature_set, similarity_threshold=similarity_threshold, functional=functional,
class Individu(): def __init__(self, nb_entrees, nb_sorties, idInd): self.nb_e = nb_entrees self.nb_s = nb_sorties self.id = idInd self.espece = None self.genome = Genome(self.nb_e, self.nb_s) self.phenotype = Phenotype(self.nb_e, self.nb_s) self.idToPos = { } #Ce tableau fera l'interface entre le genome et l'individu self.fitness = None self.sharedFitness = None def __repr__(self): s = "Ind " + str(self.id) + ":" s += "\n Fitness: " + str(self.fitness) s += "\n Shared Fitness: " + str(self.sharedFitness) s += "Espece: " + str(self.espece) return s def generer(self): #On ajoute au début les entrées et les sorties self.idToPos = {i: (0, i) for i in range(self.nb_e)} self.idToPos.update({self.nb_e + j: (1, j) for j in range(self.nb_s)}) #On met les valeurs de poids de genome dans le phenotype self.genome.generer() self.phenotype.generer() for innov in self.genome.connexions: c = self.genome.connexions[innov] k, l = self.idToPos[c.sortie][1], self.idToPos[c.entree][1] self.phenotype.liens[0][1][k, l] = c.poids def calculateFitness(self): pass def output(self): return self.phenotype.couches[-1] def rawFitness(self): if self.fitness == None: self.fitness = 0 return self.fitness def add_key(self, nouvid, couche, num): """Met à jour la table idToPos en ajoutant un noeud qui sera en la couche et dont le numéro est num""" assert nouvid not in self.idToPos, "Le nouvel identifiant ne doit pas être existent" self.idToPos[nouvid] = (couche, num) def insertLayer(self, couche): """Insère une couche aprés la couche indiqué en paramètre""" #Décale tous les position d'une couche for i in self.idToPos: if self.idToPos[i][0] > couche: n, h = self.idToPos[i] self.idToPos[i] = (n + 1, h) #Ajoute une nouvelle couche en inserant de nouvelles matrices liens self.phenotype.insertLayer(couche) def posToId(self, pos): for i in self.idToPos: if self.idToPos[i] == pos: return i def estRecursive(self, con): ce, ne = self.idToPos[con.entree] cs, ns = self.idToPos[con.sortie] return ce >= cs def insertNoeudCouche(self, couche, idNouvNoeud): assert idNouvNoeud not in self.idToPos, "Noeud déja existant" self.phenotype.insertNode(couche) self.idToPos[idNouvNoeud] = (couche, len(self.phenotype.couches[couche]) - 1) def insertNoeud(self, con, p1, p2, innov, idNouvNoeud): """Cette fonction prend une connexion déja existante et la remplace par deux nouvelle connexions et un noeud intermédiaire qui occupera la couche milieu si elle existe et créera une nouvelle couche si la connexion relie deux couches succéssives ou la même couche""" #On désactive la connexion précèdente idN1 = con.entree idN2 = con.sortie con.desactiver() #On récupère la position dans le phénotype des deux noeuds précèdemment reliés c1, n1 = self.idToPos[idN1] c2, n2 = self.idToPos[idN2] assert c2 - c1 > 0, "Un lien recursif ne peut pas etre coupé" #On a une disjonction de cas selon que les deux noeuds était dans deux couches successifs ou pas if c2 - c1 >= 2: #Si les deux noeuds ne sont pas dans de ux couches succéssifs alors on met le nouveau noeud #dans une couche au milieu des deux couches m = (c1 + c2) // 2 p = len(self.phenotype.couches[m]) #On met à jour la table idToPos self.add_key(idNouvNoeud, m, p) #On insère le noeud dans la couche m self.phenotype.insertNode(m) self.phenotype.modifierConnexion(idN1, idNouvNoeud, self.idToPos, p1) self.phenotype.modifierConnexion(idNouvNoeud, idN2, self.idToPos, p2) self.phenotype.modifierConnexion(idN1, idN2, self.idToPos, 0) self.genome.ajouterConnexion(idN1, idNouvNoeud, p1, innov) self.genome.ajouterConnexion(idNouvNoeud, idN2, p2, innov + 1) elif c2 - c1 == 1: #On ajoute la nouvelle couche en dessus de la couche en dessous (ie le min) c = min(c1, c2) self.insertLayer(c) self.insertNoeudCouche(c + 1, idNouvNoeud) c1, n1 = self.idToPos[idN1] c2, n2 = self.idToPos[idN2] self.phenotype.modifierConnexion(idN1, idNouvNoeud, self.idToPos, p1) self.phenotype.modifierConnexion(idNouvNoeud, idN2, self.idToPos, p2) self.phenotype.modifierConnexion(idN1, idN2, self.idToPos, 0) self.genome.ajouterConnexion(idN1, idNouvNoeud, p1, innov) self.genome.ajouterConnexion(idNouvNoeud, idN2, p2, innov + 1) self.phenotype.reinit() def connexionPossible(self): if not (self.phenotype.estComplet()): tries = 0 noeuds = self.idToPos.keys() noeudsSansEntree = [ i for i in noeuds if (i not in range(self.nb_e)) ] e = ut.randomPick(noeuds) s = ut.randomPick(noeudsSansEntree) c = self.genome.entreeSortieToCon(e, s) while tries < 10 and c != None and c.activation: e = ut.randomPick(noeuds) s = ut.randomPick(noeudsSansEntree) c = self.genome.entreeSortieToCon(e, s) tries += 1 if tries < 10: if c != None: return c else: return Connexion(e, s, 1) def mutationPoids(self): for i in self.genome.connexions: c = self.genome.connexions[i] if rand.random() < prob.mutation.poids: if rand.random() < prob.mutation.poids_radical: c.poids = 30 * rand.random() - 15 else: c.poids += 0.5 * rand.random() self.phenotype.modifierConnexion(c.entree, c.sortie, self.idToPos, c.poids) def insertLien(self, c, innov): self.phenotype.modifierConnexion(c.entree, c.sortie, self.idToPos, c.poids) if not (c.activation): c.activer() else: self.genome.ajouter(c, innov) def draw(self, pos): self.phenotype.draw(pos, self.posToId)
def advance_generation(self): from phenotype import Phenotype from plant import Plant from math import log, sqrt #self.avg_transpositions_in_this_generation = 0 #if self.generation%10==0: # debug.g("====") # debug.g(self.optimal_global_phenotype.properties) # debug.g(self.base_phenotype.properties) if parameters.random_pressure > 0.0: # for plant in self.plants.values(): #if happened(random_pressure): # plant.die() self.randomkiller.eliminate(self.plants.values()) self.killer.eliminate(self.plants.values()) #=========LOCATION if parameters.location_mode: r = parameters.LD0range r2 = r*r def important_fields_in_mesh(location): f = lambda (x,y): [(x,y),(x-r,y-r),(x,y-r),(x+r,y-r),(x-r,y),(x+r,y),(x-r,y+r),(x,y+r),(x+r,y+r)] g = lambda x: -1<x and x<self.mesh_size h = lambda (x,y): (Plant.scale(x),Plant.scale(y)) i = lambda (x,y): g(x) and g(y) #debug.g(location) #debug.g(r) fields = set() for x in filter(i, map(h, f(location))): fields.add(x) return fields def shuffle(l): random.shuffle(l) return l def maybe_neighbour_kill(p1, p2): fitness_val = p1.fitness() d2_fun = lambda ((x1,y1), (x2,y2)): (x2-x1)**2+(y2-y1)**2 d2 = d2_fun((p1.location, p2.location)) ##range_val = -2*log(d/r) #try: range_val = 2*logr-log(d2) #except: range_val = 1 import math f = lambda (y,ymax,ymin): (y-ymin)/(ymax-ymin) range_val = f((math.e**(d2), math.e**(r2), 1.)) if range_val <= 1.: return not distributions.happened(range_val * fitness_val) else: return False #debug.g("%f, %f" %(range_val, math.sqrt(d2))) for plant in shuffle(self.plants.values()): if not plant.dead: for (x,y) in important_fields_in_mesh(plant.location): if plant.dead: break for killer_plant in (self.mesh[x][y]).values(): if maybe_neighbour_kill(plant, killer_plant) and plant.id != killer_plant.id: plant.die() break #================= #transpositions_itg = 0 #for plant in self.plants.values(): # transpositions_itg += plant.transpositions #self.avg_transpositions_in_this_generation = float(transpositions_itg) / float(len(self.plants.values())) self.history.update() v = self.plants.values() self.survivors = len(v) self.reproducer.reproduce(self.plants) ##KG # for plant in self.plants.values(): # print " >> after reproduction part" + str(plant.aut_transposons) + "==" + str(len(plant.aut_transposons_list)) # for x in plant.aut_transposons_list : # print "TE #" + str(x.id) + ", parent: " + str(x.parent) + ", aut= " + str(x.is_aut) for plant in self.plants.values(): plant.evolve() self.allplantslist = self.plants.values() if parameters.expected_horiz_transfers > 0.0: for plant in self.plants.values(): plant.perform_horizontal_transfers() if self.generation >= parameters.stability_period: if parameters.is_drift_directed: for _unused in range(parameters.number_of_mutations): self.base_phenotype[distributions.runifint(0, parameters.no_phenotype_properties-1)] += parameters.expected_mutation_shift else: for _unused in range(parameters.number_of_mutations): self.base_phenotype.mutate_once(stdev = parameters.expected_mutation_shift) self.optimal_global_phenotype = None if parameters.fluctuations_magnitude > 0.0: self.optimal_global_phenotype = self.base_phenotype.add(Phenotype.new_random(parameters.fluctuations_magnitude)) else: self.optimal_global_phenotype = self.base_phenotype allpl = sorted(self.plants.values(), key = lambda p: p.ord_counter) i = 0 for p in allpl: p.ord_counter = i i += 1 #self.history.update() self.generation += 1
class Environment: environments = 0 def __init__(self): #-----location part if parameters.location_mode: self.mesh_size = 1./parameters.LD0range/2 f = lambda size: [[{} for x in xrange(int(size)+1)] for y in xrange(int(size)+1)] self.mesh = f(self.mesh_size) map_path = parameters.map_phenotype_image(parameters.maps) self.load_terrain(map_path+".info.tmp", map_path+".tmp") #------------------ from plant import Plant from phenotype import Phenotype self.plants = {} self.allplantslist = [] self.generation = 0 self.__class__.default = self self.__class__.environments += 1 debug.g("niche %d" % parameters.niche_size) for i in xrange(parameters.niche_size): if parameters.location_mode: Plant.new(parameters.get_start_point(parameters.maps)) else: Plant.new((0,0)) debug.g("*** %d" % len(self.plants)) self.optimal_global_phenotype = Phenotype() self.base_phenotype = Phenotype() self.survivors = parameters.niche_size self.randomkiller = selectors.KillerRandom() (self.killer, self.reproducer) = selectors.getSelectors() self.phenotype_link = Phenotype self.history = History(self) self.history.update() def load_terrain(self, info_filename, data_filename): size = (0,0) with open(info_filename) as f: arrinfo = array.array('L') arrinfo.fromfile(f, 2) size = (arrinfo[0], arrinfo[1]) self.map_size = size self.phenotype_map = [[(1,1,1) for y in xrange(size[1])] for x in xrange(size[0])] with open(data_filename) as f: scale = lambda x: (float(x-127)/128)*parameters.map_phenotype_amplitude/2 arr = array.array('B') arr.fromfile(f, size[0]*size[1]*3) for y in xrange(size[1]): for x in xrange(size[0]): self.phenotype_map[x][y] = map(scale, (lambda nr: arr[nr:nr+3])(3*x+3*size[0]*y)) #debug.g(size) #for x in xrange(63): # debug.g(self.phenotype_map[x*10][0]) def register_new_plant(self, number, plant): self.plants[number] = plant ##TODO if parameters.location_mode: try: self.mesh[plant.scalex()][plant.scaley()][number] = plant except: debug.g(plant.scalex()) debug.g(plant.scaley()) debug.g(number) 0/0 def unregister_plant(self, number): ##TODO if parameters.location_mode: plant = self.plants[number] del self.mesh[plant.scalex()][plant.scaley()][number] del self.plants[number] def advance_generation(self): from phenotype import Phenotype from plant import Plant from math import log, sqrt #self.avg_transpositions_in_this_generation = 0 #if self.generation%10==0: # debug.g("====") # debug.g(self.optimal_global_phenotype.properties) # debug.g(self.base_phenotype.properties) if parameters.random_pressure > 0.0: # for plant in self.plants.values(): #if happened(random_pressure): # plant.die() self.randomkiller.eliminate(self.plants.values()) self.killer.eliminate(self.plants.values()) #=========LOCATION if parameters.location_mode: r = parameters.LD0range r2 = r*r def important_fields_in_mesh(location): f = lambda (x,y): [(x,y),(x-r,y-r),(x,y-r),(x+r,y-r),(x-r,y),(x+r,y),(x-r,y+r),(x,y+r),(x+r,y+r)] g = lambda x: -1<x and x<self.mesh_size h = lambda (x,y): (Plant.scale(x),Plant.scale(y)) i = lambda (x,y): g(x) and g(y) #debug.g(location) #debug.g(r) fields = set() for x in filter(i, map(h, f(location))): fields.add(x) return fields def shuffle(l): random.shuffle(l) return l def maybe_neighbour_kill(p1, p2): fitness_val = p1.fitness() d2_fun = lambda ((x1,y1), (x2,y2)): (x2-x1)**2+(y2-y1)**2 d2 = d2_fun((p1.location, p2.location)) ##range_val = -2*log(d/r) #try: range_val = 2*logr-log(d2) #except: range_val = 1 import math f = lambda (y,ymax,ymin): (y-ymin)/(ymax-ymin) range_val = f((math.e**(d2), math.e**(r2), 1.)) if range_val <= 1.: return not distributions.happened(range_val * fitness_val) else: return False #debug.g("%f, %f" %(range_val, math.sqrt(d2))) for plant in shuffle(self.plants.values()): if not plant.dead: for (x,y) in important_fields_in_mesh(plant.location): if plant.dead: break for killer_plant in (self.mesh[x][y]).values(): if maybe_neighbour_kill(plant, killer_plant) and plant.id != killer_plant.id: plant.die() break #================= #transpositions_itg = 0 #for plant in self.plants.values(): # transpositions_itg += plant.transpositions #self.avg_transpositions_in_this_generation = float(transpositions_itg) / float(len(self.plants.values())) self.history.update() v = self.plants.values() self.survivors = len(v) self.reproducer.reproduce(self.plants) ##KG # for plant in self.plants.values(): # print " >> after reproduction part" + str(plant.aut_transposons) + "==" + str(len(plant.aut_transposons_list)) # for x in plant.aut_transposons_list : # print "TE #" + str(x.id) + ", parent: " + str(x.parent) + ", aut= " + str(x.is_aut) for plant in self.plants.values(): plant.evolve() self.allplantslist = self.plants.values() if parameters.expected_horiz_transfers > 0.0: for plant in self.plants.values(): plant.perform_horizontal_transfers() if self.generation >= parameters.stability_period: if parameters.is_drift_directed: for _unused in range(parameters.number_of_mutations): self.base_phenotype[distributions.runifint(0, parameters.no_phenotype_properties-1)] += parameters.expected_mutation_shift else: for _unused in range(parameters.number_of_mutations): self.base_phenotype.mutate_once(stdev = parameters.expected_mutation_shift) self.optimal_global_phenotype = None if parameters.fluctuations_magnitude > 0.0: self.optimal_global_phenotype = self.base_phenotype.add(Phenotype.new_random(parameters.fluctuations_magnitude)) else: self.optimal_global_phenotype = self.base_phenotype allpl = sorted(self.plants.values(), key = lambda p: p.ord_counter) i = 0 for p in allpl: p.ord_counter = i i += 1 #self.history.update() self.generation += 1 def optimal_phenotype_on_map(self, (x,y)): scale = lambda (v, length): int(round((length-1)*(v+1)/2)) xp = scale((x, self.map_size[0])) yp = scale((y, self.map_size[1])) return self.optimal_global_phenotype.get_map_phenotype(self.phenotype_map[xp][yp])
def __init__(self, data_dict, nphenom): self.__dict__ = dict( map(lambda x: (x, Chromosome(data_dict[x])), data_dict)) self.__phenotype__ = list( map(lambda x: Phenotype(data_dict["J"]), list(range(nphenom))))
def label_permutation_test(model_dir): """Permutation test measuring the performance of the model when the labels are shuffled. :param model_dir: directory containing the model state dictionaries for each fold and the model configuration (including the population graph parameterisation) :return: the test set performance for each permutation. """ with open(os.path.join(model_dir, 'config.yaml')) as file: cfg = yaml.full_load(file) graph_name = cfg['graph_name']['value'] conv_type = cfg['model']['value'] n_conv_layers = cfg['n_conv_layers']['value'] layer_sizes = ast.literal_eval(cfg['layer_sizes']['value']) dropout_p = cfg['dropout']['value'] similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]] similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1] if graph_name not in GRAPH_NAMES: graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set, similarity_threshold=similarity_threshold, functional=False, structural=True, euler=True) graph = graph_construct.load_population_graph(graph_root, graph_name) folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5) fold = folds[0] brain_gnn_train.set_training_masks(graph, *fold) graph_transform.graph_feature_transform(graph) rs = [] r2s = [] mses = [] for i in range(1000): graph.to('cpu') permute_population_graph_labels(graph, i) if ConvTypes(conv_type) == ConvTypes.GCN: model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p) else: model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p) model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(0)))) model = model.to(device) data = graph.to(device) model.eval() model = model(data) predicted = model[data.test_mask].cpu() actual = graph.y[data.test_mask].cpu() r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy()) r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten()) mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy()) rs.append(r[0]) r2s.append(r2) mses.append(mse) print(r[0], r2, mse) np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r')), rs) np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r2')), r2s) np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'mse')), mses) return [rs, r2s]
from json import JSONEncoder GFF = '/home/ethan/Documents/github/CoRNonCOB/corncob/killers/Lc20.fasta/prokka_results/PROKKA_03222020.gff' GENOMES = '/home/ethan/Documents/ecoli_genome/putonti_seqs/nice' RUN_DIR = '/home/ethan/Documents/phenotype_test' PROKA = '/home/ethan/prokka/bin/./prokka' from phenotype import Phenotype p = Phenotype(GENOMES, RUN_DIR, phenotype='n') p.pull_peptides(prokka_exec=PROKA) p.get_conserved_sequences()
def translate_to_phenotype(self): return Phenotype(self)
data_timeseries = 'data/raw_ts' data_phenotype = 'data/phenotype.csv' data_similarity = 'data/similarity' data_ct = 'data/CT.csv' data_sa = 'data/SA.csv' data_vol = 'data/Vol.csv' data_euler = 'data/Euler.csv' data_computed_fcms = 'data/processed_ts' SUBJECT_IDS = 'data/subject_ids.npy' # Exclude the following raw timeseries due to incorrect size. EXCLUDED_UKB_IDS = ['UKB2203847', 'UKB2208238', 'UKB2697888'] # Graph construction phenotypic parameters. AGE_UID = Phenotype.get_biobank_codes(Phenotype.AGE)[0] def get_subject_ids(num_subjects=None, randomise=True, seed=0): """Gets the list of subject IDs for a spcecified number of subjects. :param num_subjects: number of subjects. Use the entire dataset when set to None. :param randomise: indicates whether to use a random seed for selection of subjects. :param seed: random seed value. :return: list of subject IDs. """ if not os.path.isfile(os.path.join(data_root, 'subject_ids.npy')): ukb_preprocess.precompute_subject_ids() subject_ids = np.load(os.path.join(data_root, 'subject_ids.npy'),
def init(self, cartSpace, cartPrice, limit): for _ in range(self.populationSize): self.population.append(Phenotype(cartSpace, cartPrice, limit)) self.bestSolution = self.population[0]
def evaluate_noise_performance(model_dir, noise_type='node'): """Measures the test set performance of the model under the specified model directory when noise is added. :param model_dir: directory containing the model state dictionaries for each fold and the model configuration (including the population graph parameterisation) :param noise_type: 'node', 'node_feature_permutation' or 'edge'. :return: the dictionary of results under five different random seeds and increasing probabilities of added noise. """ with open(os.path.join(model_dir, 'config.yaml')) as file: cfg = yaml.full_load(file) graph_name = cfg['graph_name']['value'] conv_type = cfg['model']['value'] n_conv_layers = cfg['n_conv_layers']['value'] layer_sizes = ast.literal_eval(cfg['layer_sizes']['value']) dropout_p = cfg['dropout']['value'] lr = cfg['learning_rate']['value'] weight_decay = cfg['weight_decay']['value'] similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]] similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1] if graph_name not in GRAPH_NAMES: graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set, similarity_threshold=similarity_threshold, functional=False, structural=True, euler=True) graph = graph_construct.load_population_graph(graph_root, graph_name) folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5) fold = folds[0] results = {} for i in range(1, 5): brain_gnn_train.set_training_masks(graph, *fold) results_fold = {} for p in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 0.95]: graph.to('cpu') graph_transform.graph_feature_transform(graph) if noise_type == 'node': add_population_graph_noise(graph, p, random_state=i) if noise_type == 'edge': remove_population_graph_edges(graph, p, random_state=i) if noise_type == 'node-feature-permutation': permute_population_graph_features(graph, p, random_state=i) data = graph.to(device) epochs = 10000 model, _ = brain_gnn_train.train(conv_type, graph, device, n_conv_layers, layer_sizes, epochs, lr, dropout_p, weight_decay, patience=100) model.eval() model = model(data) predicted = model[data.test_mask].cpu() actual = data.y[data.test_mask].cpu() r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy()) r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten()) results_fold['p={}_metric=r'.format(p)] = [x.item() for x in r][0] wandb.run.summary['{}_{}_{}_p={}_metric=r'.format(conv_type, noise_type, i, p)] = [x.item() for x in r][0] results_fold['p={}_metric=r2'.format(p)] = r2.item() wandb.run.summary['{}_{}_{}_p={}_metric=r2'.format(conv_type, noise_type, i, p)] = r2.item() gc.collect() results['{}_{}_{}'.format(conv_type, noise_type, i)] = results_fold return results