def __init__(self, n_range, subnets: NetworkSet): # Set up logging self.uid = guid() self.logger_name = f"network generator.{self.uid}" self.logger = logging.getLogger(self.logger_name) self.logger.debug("Creating new network generator") # parameters self.n = n_range self.subnets = subnets self.min_leaves_per_subnet = min([ subnet.network.number_of_leaves for subnet in subnets.per_network_info() ])
def iterator(self, trinet_set): param_iterator = itertools.product(self.tail_move, self.uniform, self.deletion) for parameters in param_iterator: self.logger.info("Generating next trinet set") tm, u, d = parameters distorted_trinet_set = NetworkSet.distort( trinet_set, u, tm, d, reference_networks.TRINET_LIST, self.max_replacement_level) yield { 'tail_move': tm, 'uniform': u, 'deletion': d }, distorted_trinet_set
def iterator_2(self): pbar = tqdm(total=len(self)) for network_params in self.ng.param_iterator(): for trinet_set_params in self.tsg.param_iterator(): self.logger.info("Next experiment") experiment = Experiment(self.name) # Create problem self.logger.info("Generate network") network = self.ng.generate(**network_params) self.logger.info("Generate trinet set from network") input_trinet_set = NetworkSet.induced_strict_network_set( network, 3, max_processes=self.max_processes, progress_bar=False) self.logger.info("Distort trinet set") distorted_trinet_set = self.tsg.generate( input_trinet_set, **trinet_set_params) # Save consistency scores # experiment['consistency scores']['distorted trinet set'] = distorted_trinet_set.consistency_score(input_trinet_set, self.consistency_method) # Save summary self.logger.info("Compute summary") experiment['summaries'][ 'distorted trinet set'] = distorted_trinet_set.summary( depth=[1]) # Save parameters experiment['parameters']['input network'] = network_params experiment['parameters']['max processes'] = self.max_processes experiment['parameters']['distortion'] = trinet_set_params # Save input and output experiment['input output']['input network'] = network.enewick() experiment['input output'][ 'distorted trinet set'] = distorted_trinet_set.to_enewick( ) pbar.update(1) yield experiment
def rerun(self, derive_network_sets=False): self.logger.info("Running experiment ...") max_processes = self['parameters']['max processes'] consistency_method = self['parameters']['consistency method'] try: self.logger.info("Loading input network") network = RootedLevelKNetwork.from_enewick( self['input output']['input network']) network.visualize() input_trinet_set = self.recreate_network_set('input trinet set', 3) if derive_network_sets: input_triplet_set = NetworkSet.induced_strict_tree_set_of_network_set( input_trinet_set, 3, max_processes) input_cluster_set = NetworkSet.induced_cluster_set( network, max_processes) self['input output'][ 'input triplet set'] = input_trinet_set.to_enewick() self['input output'][ 'input cluster set'] = input_cluster_set.to_enewick() else: input_triplet_set = self.recreate_network_set( 'input triplet set', 3) input_cluster_set = self.recreate_network_set( 'input cluster set') distorted_trinet_set = self.recreate_network_set( 'distorted trinet set', 3) t6 = time.time() self.logger.info("Creating solver") solver_params = self['parameters']['solver'] solver = Solver(reference_networks.TRINET_LIST, distorted_trinet_set, **solver_params, is_main=True) output_network, solver_scores, input_time = solver.solve() t7 = time.time() output_trinet_set, t8 = NetworkSet.induced_strict_network_set( output_network, 3, max_processes=max_processes), time.time() self.logger.info("Computing output triplet set") output_triplet_set, t9 = NetworkSet.induced_strict_tree_set_of_network_set( output_trinet_set, 3, max_processes=max_processes), time.time() self.logger.info("Computing output cluster set") output_cluster_set, t10 = NetworkSet.induced_cluster_set( output_network, max_processes=max_processes), time.time() self.logger.info("Computing trinet consistency") IO_tn_cs, t11 = input_trinet_set.consistency_score( output_trinet_set, consistency_method), time.time() self.logger.info("Computing triplet consistency") IO_tp_cs, t12 = input_triplet_set.consistency_score( output_triplet_set, consistency_method), time.time() self.logger.info("Computing cluster consistency") IO_ct_cs, t13 = input_cluster_set.consistency_score( output_cluster_set, consistency_method), time.time() self.logger.info("Computing cut-arc set consistency") IO_cas_cs, t14 = network.cut_arc_set_consistency( output_network), time.time() self.logger.info( "Checking if output network is equal to input network") equal, t15 = network.equal_structure( output_network, equal_naming=True), time.time() # Save consistencies self['consistency scores']['trinet'] = IO_tn_cs self['consistency scores']['triplet'] = IO_tp_cs self['consistency scores']['cluster'] = IO_ct_cs self['consistency scores']['cut-arc set'] = IO_cas_cs self['consistency scores']['network'] = equal[0] # Save run times self['run times']['solving'] = t7 - t6 - input_time self['run times']['output trinet set creation'] = t8 - t7 self['run times']['output triplet set creation'] = t9 - t8 self['run times']['output cluster set creation'] = t10 - t9 self['run times']['trinet consistency score'] = t11 - t10 self['run times']['triplet consistency score'] = t12 - t11 self['run times']['cluster consistency score'] = t13 - t12 self['run times']['cut-arc set consistency score'] = t14 - t13 self['run times']['equal'] = t15 - t14 # Save input output self['input output']['output network'] = output_network.enewick() self['input output'][ 'output trinet set'] = output_trinet_set.to_enewick() self['input output'][ 'output triplet set'] = output_triplet_set.to_enewick() self['input output'][ 'output cluster set'] = output_cluster_set.to_enewick() # Save summaries self['summaries']['output network'] = output_network.summary() # Save finished self['finished']['full'] = True except Exception as e: self.logger.warning(f"An error occurred: {type(e)}: {str(e)}") self['finished']['error'] = f"{type(e)}: {str(e)}" self.save(extra_path="\\failures") raise e self.save()
def recreate_network_set(self, category, network_size=None): self.logger.info(f"Loading {category}") return NetworkSet.from_enewick(self['input output'][category], network_size)
def iterator(self, new_network=True): t0 = time.time() pbar = tqdm(total=len(self)) for network_params, network in self.ng.iterator(): self.logger.info("Generating next experiment") experiment = Experiment(self.name) try: # Run t1 = time.time() self.logger.info("Computing input trinet set") input_trinet_set, t2 = NetworkSet.induced_strict_network_set( network, 3, max_processes=self.max_processes, progress_bar=False), time.time() # self.logger.info("Computing input triplet set") # input_triplet_set, t3 = NetworkSet.induced_strict_tree_set_of_network_set(input_trinet_set, 3, max_processes=self.max_processes), time.time() # self.logger.info("Computing input cluster set") # input_cluster_set, t4 = NetworkSet.induced_cluster_set(network, max_processes=self.max_processes), time.time() # Save parameters experiment['parameters']['input network'] = network_params experiment['parameters']['max processes'] = self.max_processes experiment['parameters'][ 'consistency method'] = self.consistency_method # Save run times experiment['run times']['input network creation'] = t1 - t0 experiment['run times']['input trinet set creation'] = t2 - t1 # experiment['run times']['input triplet set creation'] = t3 - t2 # experiment['run times']['input cluster set creation'] = t4 - t3 # Save input and output experiment['input output']['input network'] = network.enewick() experiment['input output'][ 'input trinet set'] = input_trinet_set.to_enewick() # experiment['input output']['input triplet set'] = input_triplet_set.to_enewick() # experiment['input output']['input cluster set'] = input_cluster_set.to_enewick() # Save summary experiment['summaries']['input network'] = network.summary() t4 = time.time() for trinet_set_params, distorted_trinet_set in self.tsg.iterator( input_trinet_set): t5 = time.time() # Save consistency score experiment['consistency scores'][ 'distorted trinet set'] = distorted_trinet_set.consistency_score( input_trinet_set, self.consistency_method) # Save parameters experiment['parameters']['distortion'] = trinet_set_params # Save run time experiment['run times'][ 'distorted trinet set creation'] = t5 - t4 # Save input output experiment['input output'][ 'distorted trinet set'] = distorted_trinet_set.to_enewick( ) # Save summary experiment['summaries'][ 'distorted trinet set'] = distorted_trinet_set.summary( ) for solver_params, solver in self.sg.iterator( distorted_trinet_set): t6 = time.time() experiment['finished']['init'] = True experiment.save("\\temp\\") output_network, solver_scores, input_time = solver.solve( ) t7 = time.time() self.logger.info("Computing output trinet set") output_trinet_set, t8 = NetworkSet.induced_strict_network_set( output_network, 3, max_processes=self.max_processes), time.time() self.logger.info("Computing output triplet set") output_triplet_set, t9 = NetworkSet.induced_strict_tree_set_of_network_set( output_trinet_set, 3, max_processes=self.max_processes), time.time() self.logger.info("Computing output cluster set") # output_cluster_set, t10 = NetworkSet.induced_cluster_set(output_network, max_processes=self.max_processes), time.time() self.logger.info("Computing trinet consistency score") IO_tn_cs, t11 = input_trinet_set.consistency_score( output_trinet_set, self.consistency_method), time.time() # self.logger.info("Computing triplet consistency score") # IO_tp_cs, t12 = input_triplet_set.consistency_score(output_triplet_set, self.consistency_method), time.time() # self.logger.info("Computing cluster consistency score") # IO_ct_cs, t13 = input_cluster_set.consistency_score(output_cluster_set, self.consistency_method), time.time() self.logger.info( "Computing cut-arc set consistency score") IO_cas_cs, t14 = network.cut_arc_set_consistency( output_network), time.time() self.logger.info( "Checking if output network is equal to input network" ) equal, t15 = network.equal_structure( output_network, equal_naming=True), time.time() # Save consistency scores experiment['consistency scores']['trinet'] = IO_tn_cs # experiment['consistency scores']['triplet'] = IO_tp_cs # experiment['consistency scores']['cluster'] = IO_ct_cs # experiment['consistency scores']['cut-arc set'] = IO_cas_cs # experiment['consistency scores']['network'] = equal[0] # Solving parameters experiment['parameters']['solver'] = solver_params # Save run times experiment['run times'][ 'solving'] = t7 - t6 - input_time # Save input output experiment['input output'][ 'output trinet set'] = output_trinet_set.to_enewick( ) # experiment['input output']['output triplet set'] = output_triplet_set.to_enewick() # experiment['input output']['output cluster set'] = output_cluster_set.to_enewick() experiment['input output'][ 'output network'] = output_network.enewick() # Save summary experiment['summaries'][ 'output network'] = output_network.summary() # Save finsished experiment['finished']['full'] = True pbar.update(1) yield experiment if new_network: t0 = time.time() _, network = self.ng.generate(network_params) t1 = time.time() self.logger.info("Computing input trinet set") input_trinet_set, t2 = NetworkSet.induced_strict_network_set( network, 3, max_processes=self.max_processes, progress_bar=False), time.time() # self.logger.info("Computing input triplet set") # input_triplet_set, t3 = NetworkSet.induced_strict_tree_set_of_network_set(input_trinet_set, 3, # max_processes=self.max_processes), time.time() # self.logger.info("Computing input cluster set") # input_cluster_set, t4 = NetworkSet.induced_cluster_set(network, max_processes=self.max_processes), time.time() # Save input and output experiment['input output'][ 'input network'] = network.enewick() experiment['input output'][ 'input trinet set'] = input_trinet_set.to_enewick( ) # experiment['input output']['input triplet set'] = input_triplet_set.to_enewick() # experiment['input output']['input cluster set'] = input_cluster_set.to_enewick() # Save summary experiment['summaries'][ 'input network'] = network.summary() t4 = time.time() except Exception as e: self.logger.warning(f"An error occurred: {type(e)}: {str(e)}") experiment['finished']['error'] = f"{type(e)}: {str(e)}" experiment.save(extra_path="\\failures") raise e t0 = time.time()
def iterator_2(self, reuse_network, reuse_trinet_set, compute): compute = coalesce(compute, [1, 1, 1, 1, 1, 1]) pbar = tqdm(total=len(self)) for network_params in self.ng.param_iterator(): new_network = True for trinet_set_params in self.tsg.param_iterator(): new_trinet_set = True for solver_params in self.sg.param_iterator(): experiment = Experiment(self.name) # Create problem if not reuse_network or new_network: network = self.ng.generate(**network_params) input_trinet_set = NetworkSet.induced_strict_network_set( network, 3, max_processes=self.max_processes, progress_bar=False) new_network = False if not reuse_trinet_set or new_trinet_set: distorted_trinet_set = self.tsg.generate( input_trinet_set, **trinet_set_params) new_trinet_set = False solver = self.sg.generate(distorted_trinet_set, solver_params) # Solve t0 = time.time() output_network, solver_scores, input_time = solver.solve() t1 = time.time() # Derived sets if compute[0]: output_trinet_set = NetworkSet.induced_strict_network_set( output_network, 3, max_processes=self.max_processes) IO_tn_cs = input_trinet_set.consistency_score( output_trinet_set, self.consistency_method) experiment['consistency scores']['trinet'] = IO_tn_cs experiment['input output'][ 'input trinet set'] = input_trinet_set.to_enewick( ) if compute[1]: input_triplet_set = NetworkSet.induced_strict_tree_set_of_network_set( input_trinet_set, 3, max_processes=self.max_processes) output_triplet_set = NetworkSet.induced_strict_tree_set_of_network_set( output_trinet_set, 3, max_processes=self.max_processes) IO_tp_cs = input_triplet_set.consistency_score( output_triplet_set, self.consistency_method) experiment['consistency scores']['triplet'] = IO_tp_cs experiment['input output'][ 'input triplet set'] = input_triplet_set.to_enewick( ) if compute[2]: input_cluster_set = NetworkSet.induced_cluster_set( network, max_processes=self.max_processes) output_cluster_set = NetworkSet.induced_cluster_set( output_network, max_processes=self.max_processes) IO_ct_cs = input_cluster_set.consistency_score( output_cluster_set, self.consistency_method) experiment['consistency scores']['cluster'] = IO_ct_cs experiment['input output'][ 'input cluster set'] = input_cluster_set.to_enewick( ) if compute[3]: IO_cas_cs = network.cut_arc_set_consistency( output_network) experiment['consistency scores'][ 'cut-arc set'] = IO_cas_cs if compute[4]: equal = network.equal_structure(output_network, equal_naming=True) experiment['consistency scores']['network'] = equal[0] if compute[5]: experiment['consistency scores'][ 'distorted trinet set'] = distorted_trinet_set.consistency_score( input_trinet_set, self.consistency_method) # Save run times experiment['run times']['solving'] = t1 - t0 - input_time # Save parameters experiment['parameters']['input network'] = network_params experiment['parameters'][ 'max processes'] = self.max_processes experiment['parameters'][ 'consistency method'] = self.consistency_method experiment['parameters']['distortion'] = trinet_set_params experiment['parameters']['solver'] = solver_params # Save input and output experiment['input output'][ 'input network'] = network.enewick() experiment['input output'][ 'output network'] = output_network.enewick() experiment['input output'][ 'distorted trinet set'] = distorted_trinet_set.to_enewick( ) pbar.update(1) yield experiment
def generate(self, trinet_set, uniform, tail_move, deletion): distorted_trinet_set = NetworkSet.distort( trinet_set, uniform, tail_move, deletion, reference_networks.TRINET_LIST, self.max_replacement_level) return distorted_trinet_set
'generator_count_method': settings.MAXIMUM_MULTIPLICITY, 'symmetric_sides_set_count_method': settings.MAXIMUM_MULTIPLICITY, 'leaf_order_count_method': settings.MAXIMUM_MULTIPLICITY, 'leaf_order_method': settings.DEFAULT, 'fill_gaps': settings.FALSE } root = logging.getLogger() root.setLevel(logging.INFO) sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.INFO) sformatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') sh.setFormatter(sformatter) root.addHandler(sh) fh = logging.FileHandler(f"{output_file}.log") fh.setLevel(logging.INFO) fformatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(fformatter) root.addHandler(fh) TS_input = NetworkSet.from_named_trinet_format(input_file) solver = Solver(TRINET_LIST, TS_input, **parameters) network, _, _ = solver.solve() with open(f"{output_file}.eNewick", 'w') as f: f.write(network.enewick())
# kk # Processing power max_processes = 16 # ---------------------------------------------------------------------- 2 # Experiment settings experiment_name = 'runtimes\\level_2\\FULL' repeat = 4 generators = [ GENERATOR_DICT['2a'], GENERATOR_DICT['2b'], GENERATOR_DICT['2c'], GENERATOR_DICT['2d'] ] num_leaves = [15, 20, 25, 30, 35, 40] subnets = NetworkSet.subnets_from_generators(generators, 2) ng = NetworkGeneratorSubnets(num_leaves, subnets) # Trinet Set Generator tail_move_prob = [0.025] uni_prob = [0.025] del_prob = [0] max_replacement_level = 2 tsg = TrinetSetGenerator(tail_move_prob, uni_prob, del_prob, max_replacement_level) # ------------------------- MM parameters = { 'cut_arc_set_count_method': [settings.MAXIMUM_MULTIPLICITY], 'minimal_sink_set_method': [settings.EXPAND_FIRST_SCC], 'leaf_locator_method': [settings.DEFAULT],
# Processing power max_processes = 16 # Experiment settings experiment_name = 'consistency_scores\\tail_move\\level_2\\FULL' repeat = 5 # Network Generator generators = [ GENERATOR_DICT['2a'], GENERATOR_DICT['2b'], GENERATOR_DICT['2c'], GENERATOR_DICT['2d'] ] # generators = [GENERATOR_DICT['1']] num_leaves = [15, 20, 25] subnets = NetworkSet.subnets_from_generators( generators, 2) # TODO important change for level 1 vs 2 ng = NetworkGeneratorSubnets(num_leaves, subnets) # Trinet Set Generator tail_move_prob = [0.01, 0.05, 0.10, 0.20] uni_prob = [0] del_prob = [0] max_replacement_level = 2 tsg = TrinetSetGenerator(tail_move_prob, uni_prob, del_prob, max_replacement_level) # Solver Generator parameters = { 'cut_arc_set_count_method': [settings.MAXIMUM_MULTIPLICITY], 'minimal_sink_set_method': [settings.EXPAND_FIRST_SCC], 'leaf_locator_method': [settings.DEFAULT],
'leaf_order_count_method': settings.MAXIMUM_MULTIPLICITY, 'leaf_order_method': settings.DEFAULT, 'fill_gaps': settings.FALSE } root = logging.getLogger('') root.setLevel(logging.INFO) # sh = logging.StreamHandler(sys.stdout) # sh.setLevel(logging.INFO) # sformatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # sh.setFormatter(sformatter) # root.addHandler(sh) with open(f"{output_file}.log", 'w+') as _: pass fh = logging.FileHandler(f"{output_file}.log") fh.setLevel(logging.INFO) fformatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(fformatter) root.addHandler(fh) TS_input = NetworkSet.from_enewick_format(input_file) solver = Solver(TRINET_LIST, TS_input, **parameters) network, _, _ = solver.solve() with open(f"{output_file}.eNewick", 'w+') as f: f.write(network.enewick())
def regenerate_standard_binets_trinets() -> None: """Regenerate and save all possible trinets.""" logging.debug("Regenerating all possible trinets and saving them.") # All generators per level all_generators = { 0: [generators.generator_level0], 1: [generators.generator_level1], 2: [ generators.generator_A, generators.generator_B, generators.generator_C, generators.generator_D ] } # Get biconnected binets and trinets for each generator biconnected_trinet_list = NetworkSet(equal_naming=False) biconnected_binet_list = NetworkSet(equal_naming=False) for level, generator_list in all_generators.items(): for generator in generator_list: generator_trinet_info_list = generator.build_trinets() generator_binet_info_list = generator.build_binets() biconnected_trinet_list.extend(generator_trinet_info_list) biconnected_binet_list.extend(generator_binet_info_list) biconnected_trinet_list.set_multiplicities_to_one() biconnected_binet_list.set_multiplicities_to_one() # From binets create trinets with two biconnected components two_component_trinet_list = NetworkSet() two_binet_infos_iterator = itertools.product( biconnected_binet_list.per_network_info(), repeat=2) for binet_infos in two_binet_infos_iterator: for index, leaf_name in enumerate(binet_infos[0].network.leaf_names): two_component_trinet = copy.deepcopy(binet_infos[0].network) two_component_trinet.replace_leaf_with_network( leaf_name, binet_infos[1].network, replace_names=True, char_type='ALPH') two_component_trinet_list.append(NetworkInfo(two_component_trinet)) two_component_trinet_list.extend(biconnected_trinet_list) biconnected_trinet_list.extend(biconnected_binet_list) for network_info in biconnected_binet_list.per_network_info(): network_info.network.reset_optimization_variables() network_info.network.calculate_optimization_variables() biconnected_trinet_list.calculate_info() two_component_trinet_list.set_multiplicities_to_one() for network_info in two_component_trinet_list.per_network_info(): network_info.network.reset_optimization_variables() network_info.network.calculate_optimization_variables() two_component_trinet_list.calculate_info() pickle_out = open("data/all_networks_save.pickle", 'wb+') data = [all_generators, biconnected_trinet_list, two_component_trinet_list] pickle.dump(data, pickle_out) pickle_out.close()
console = logging.StreamHandler() console.setLevel(logging.DEBUG) fh = logging.FileHandler(filename='file_log.log') fh.setLevel(logging_level) # set a format which is simpler for console use formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') # tell the handler to use this format console.setFormatter(formatter) fh.setFormatter(formatter) # add the handler to the root logger logging.getLogger('').addHandler(console) logging.getLogger('network').addHandler(fh) import os from datastructures.rooted_level_k_network import RootedLevelKNetwork, NetworkSet, Omega from data.reference_networks import get_standard_binets_trinets, regenerate_standard_binets_trinets, pickle_save, pickle_read from utils.help_functions import * from data.generators import * from config import settings if __name__ == '__main__': n = RootedLevelKNetwork.random(10, 0.4, 1) n.visualize() ts = NetworkSet.induced_strict_network_set(n, 3) ts.save_to_file('jojoj', frmt=settings.FORMAT_tnets) ts2 = NetworkSet.from_named_trinet_format('jojoj.tnet') print(ts.consistency_score(ts2, method=settings.WEIGHTED_AVERAGE))
if __name__ == '__main__': # SUBNET SETUP subnet_level = 1 number_of_subnets = 10 number_of_leaves_per_internal_arc = 4 # NETWORK SETUP number_of_species = 20 # DISTORTION SETUP tail_move_fraction = 0 uniform_noise_fraction = 0.1 # Create network generators = GENERATOR_LEVEL_LIST[subnet_level] subnets = NetworkSet.subnets_from_generators( generators, number_of_leaves_per_internal_arc) input_network = RootedLevelKNetwork.from_subnets(subnets, number_of_subnets, number_of_species) input_network.visualize() # Create and distort trinet set trinet_set = NetworkSet.induced_strict_network_set(input_network, 3, 8) distorted_trinet_set = NetworkSet.distort(trinet_set, uniform_noise_fraction, tail_move_fraction, 0, TRINET_LIST, subnet_level) # Create solver and solve solver = Solver(TRINET_LIST, distorted_trinet_set) output_network = solver.solve()[0]