def init(): SeedSequence_Virus.init() SequenceEvolution_GTRGammaSeqGen.init() GC.seed_height = float(GC.seed_height) assert GC.seed_height > 0, "seed_height must be positive" assert GC.check_eval_str(GC.seed_speciation_rate_func), "Potentially dangerous seed_speciation_rate_func" GC.check_seqgen_executable()
def evolve_to_current_time(node, finalize=False): if node is None: return viruses = [virus for virus in node.viruses()] for virus in viruses: time = GC.time - virus.get_time() if time > 0: node.remove_virus(virus) try: command = [ GC.dualbirth_path, str(GC.rate_A), str(GC.rate_B), '-t', str(time) ] if GC.random_number_seed is not None: command += ['-s', str(GC.random_number_seed)] GC.random_number_seed += 1 treestr = check_output(command).decode() except FileNotFoundError: from os import chdir chdir(GC.START_DIR) assert False, "dualbirth executable was not found: %s" % GC.dualbirth_path tree = read_tree_newick(treestr) virus.set_time(virus.get_time() + tree.root.edge_length) for c in tree.root.children: GC.treenode_add_child(virus, c, node)
def init(): SeedSequence_Virus.init() SequenceEvolution_GTRGammaSeqGen.init() GC.seed_population = float(GC.seed_population) assert GC.seed_population > 0, "seed_population must be positive" GC.check_seqgen_executable() try: global TaxonNamespace from dendropy import TaxonNamespace global treesim from dendropy.simulate import treesim except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading DendroPy. Install with: pip3 install dendropy"
def init(): SeedSequence_Virus.init() SequenceEvolution_GTRGammaSeqGen.init() GC.seed_birth_rate = float(GC.seed_birth_rate) assert GC.seed_birth_rate > 0, "seed_birth_rate must be positive" GC.seed_death_rate = float(GC.seed_death_rate) assert GC.seed_death_rate >= 0, "seed_death_rate must be at least 0" GC.check_seqgen_executable() try: global treesim from dendropy.simulate import treesim except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading DendroPy. Install with: pip3 install dendropy"
def get_edge_list(): cn = barbell_graph(GC.barbell_m1, GC.barbell_m2) out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() GC.cn_communities = [ {i for i in range(GC.barbell_m1)}, { i for i in range(GC.barbell_m1 + GC.barbell_m2, 2 * GC.barbell_m1 + GC.barbell_m2) } ] # only left and right communities, not the path f = gopen( expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir), 'wb', 9) f.write(str(GC.cn_communities).encode()) f.write(b'\n') f.close() GC.cn_communities = [{str(i) for i in c} for c in GC.cn_communities] return out
def generate(): if not hasattr(GC, "seed_sequences"): rootseq = SeedSequence_Virus.generate() treestr = GC.mean_kingman_tree(len(GC.seed_nodes), pop_size=GC.seed_population) makedirs(OUT_FOLDER, exist_ok=True) f = open(OUT_FOLDER + '/time_tree.tre','w') f.write(treestr) f.close() treestr = MF.modules['TreeUnit'].time_to_mutation_rate(treestr) seqgen_file = OUT_FOLDER + '/seed.txt' f = open(seqgen_file, 'w') f.write("1 %d\nROOT %s\n1\n%s" % (len(rootseq),rootseq,treestr)) f.close() command = [GC.seqgen_path,'-or','-k1'] if GC.random_number_seed is not None: command += ['-z%d'%GC.random_number_seed] GC.random_number_seed += 1 command += GC.seqgen_args.split() try: seqgen_out = check_output(command, stdin=open(seqgen_file), stderr=open(OUT_FOLDER + '/log_seqgen.txt','w')).decode('ascii') f = open(OUT_FOLDER + '/seqgen.out','w') f.write(seqgen_out) f.close() except CalledProcessError as e: f = open('seqgen.err','w'); f.write(str(e)); f.close() chdir(GC.START_DIR) assert False, "Seq-Gen encountered an error" GC.seed_sequences = [line.split()[-1].strip() for line in seqgen_out.splitlines()[1:]] try: return GC.seed_sequences.pop() except IndexError: assert False, "Late seeds are not supported at this time"
def sample_times(node, num_times): assert hasattr( GC, 'transmissions' ), "No transmission network found in global context! Run this after the transmission network simulation is done" first_time = node.get_first_infection_time() if first_time is None: return [] windows = [] last_time = first_time for u, v, t in GC.transmissions: if u == node and v == node: if last_time is not None and t > last_time: windows.append((last_time, t)) last_time = None elif last_time is None and v == node: last_time = t if last_time is not None and t > last_time: windows.append((last_time, GC.time)) if len(windows) == 0: windows.append((first_time, GC.time)) weighted_die = {} for start, end in windows: weighted_die[(start, end)] = end - start if len(weighted_die) == 0: return [] if len(weighted_die) == 1: weighted_die[list(weighted_die.keys())[0]] = 1 out = [] for _ in range(num_times): start, end = GC.roll(weighted_die) out.append(uniform(start, end)) return out
def get_edge_list(): cn = complete_graph(GC.num_cn_nodes) out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out
def init(): try: global pyvolve import pyvolve except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve" # config validity checks custom_model_params = {} GC.ecm_type = GC.ecm_type.strip() if GC.ecm_type == 'restricted': GC.ecm_type = 'ECMrest' elif GC.ecm_type == 'unrestricted': GC.ecm_type = 'ECMunrest' else: assert False, 'ecm_type must be "restricted" or "unrestricted"' if isinstance(GC.ecm_alpha, str): GC.ecm_alpha = GC.ecm_alpha.strip() if len(GC.ecm_alpha) != 0: custom_model_params['alpha'] = float(GC.ecm_alpha) else: custom_model_params['alpha'] = float(GC.ecm_alpha) if isinstance(GC.ecm_beta, str): GC.ecm_beta = GC.ecm_beta.strip() if len(GC.ecm_beta) != 0: custom_model_params['beta'] = float(GC.ecm_beta) else: custom_model_params['beta'] = float(GC.ecm_beta) if isinstance(GC.ecm_omega, str): GC.ecm_omega = GC.ecm_omega.strip() if len(GC.ecm_omega) != 0: custom_model_params['omega'] = float(GC.ecm_omega) else: custom_model_params['omega'] = float(GC.ecm_omega) assert isinstance( GC.ecm_codon_frequencies_dictionary, dict ), "Specified ecm_codon_frequencies_dictionary is not a dictionary" if len(GC.ecm_codon_frequencies_dictionary) != 0: codons = set(GC.generate_all_kmers(3, 'ACGT')) codons.difference_update({'TGA', 'TAA', 'TAG'}) # remove STOP codons for key in GC.ecm_codon_frequencies_dictionary: assert key in codons, "%s is not a valid codon for ecm_codon_frequencies_dictionary. Only include 3-mers of the DNA alphabet, excluding the STOP codons (TGA, TAA, and TAG)" assert abs( sum(GC.ecm_codon_frequencies_dictionary.values()) - 1 ) < 0.000000001, "Frequencies in ecm_codon_frequencies_dictionary must sum to 1" custom_model_params[ 'state_freqs'] = GC.ecm_codon_frequencies_dictionary # set up Pyvolve if len(custom_model_params) == 0: GC.pyvolve_model = pyvolve.Model(GC.ecm_type) else: GC.pyvolve_model = pyvolve.Model(GC.ecm_type, custom_model_params)
def init(): GC.seqgen_path = expanduser(GC.seqgen_path.strip()) GC.seqgen_args = GC.seqgen_args.strip() assert '-d' not in GC.seqgen_args, "Do not use the Seq-Gen -d argument" assert '-k' not in GC.seqgen_args, "Do not use the Seq-Gen -k argument" assert '-l' not in GC.seqgen_args, "Do not use the Seq-Gen -l argument" assert '-n' not in GC.seqgen_args, "Do not use the Seq-Gen -n argument" assert '-o' not in GC.seqgen_args, "Do not use the Seq-Gen -o argument" assert '-p' not in GC.seqgen_args, "Do not use the Seq-Gen -p argument" assert '-s' not in GC.seqgen_args, "Do not use the Seq-Gen -s argument" assert '-m' in GC.seqgen_args, "Must specify a Seq-Gen model using the -m argument" mode = GC.seqgen_args.split('-m')[1].strip().split(' ')[0] assert mode in SEQGEN_MODES.split(', '), "Invalid Seq-Gen model (%s). Options: %s" % (mode,SEQGEN_MODES) GC.check_seqgen_executable() try: global read_tree_newick from treeswift import read_tree_newick except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading TreeSwift. Install with: pip3 install treeswift"
def get_edge_list(): cn = barabasi_albert_graph(GC.num_cn_nodes, GC.num_edges_from_new, seed=GC.random_number_seed) if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out
def evolve_to_current_time(node, finalize=False): viruses = [virus for virus in node.viruses()] for virus in viruses: time = GC.time - virus.get_time() if time > 0: node.remove_virus(virus) success = False for _ in range(100): tree = birth_death_tree(GC.bd_birth, GC.bd_death, birth_rate_sd=GC.bd_birth_sd, death_rate_sd=GC.bd_death_sd, max_time=time, repeat_until_success=True, rng=rng) if tree.seed_node.num_child_nodes() > 1: success = True break assert success, "Failed to create non-empty Birth-Death tree after 100 attempts. Perhaps try a higher birth rate or lower death rate?" virus.set_time(virus.get_time() + tree.seed_node.edge_length) for c in tree.seed_node.child_node_iter(): GC.treenode_add_child(virus, c, node)
def get_edge_list(): cn = relaxed_caveman_graph(GC.cave_num_cliques, GC.cave_clique_size, GC.cave_prob, seed=GC.random_number_seed) if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9) f.write('\n'.join(out).encode()); f.write(b'\n') f.close() GC.cn_communities = [{c*GC.cave_clique_size+i for i in range(GC.cave_clique_size)} for c in range(GC.cave_num_cliques)] f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9) f.write(str(GC.cn_communities).encode()); f.write(b'\n') f.close() GC.cn_communities = [{str(i) for i in c} for c in GC.cn_communities] return out
def get_edge_list(): du = GC.d_or_u == 'd' cn = random_partition_graph(GC.rpg_sizes, GC.rpg_p_in, GC.rpg_p_out, directed=du, seed=GC.random_number_seed) if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, GC.d_or_u) f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9) f.write('\n'.join(out).encode()); f.write(b'\n') f.close() f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9) f.write(str(cn.graph['partition']).encode()); f.write(b'\n') f.close() GC.cn_communities = [{str(n) for n in c} for c in cn.graph['partition']] return out
def get_edge_list(): cn = newman_watts_strogatz_graph(GC.num_cn_nodes, GC.nws_k, GC.nws_prob, seed=GC.random_number_seed) if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out
def init(): GC.msms_path = expanduser(GC.msms_path.strip()) assert MF.modules[ 'ContactNetworkGenerator'].__name__ in GC.COMMUNITY_GENERATORS, "Must use a ContactNetworkGenerator that creates communities (%s)" % ', '.join( sorted(GC.COMMUNITY_GENERATORS)) SeedSequence_Virus.init() SequenceEvolution_GTRGammaSeqGen.init() GC.community_seed_scaled_mutation_rate = float( GC.community_seed_scaled_mutation_rate) assert GC.community_seed_scaled_mutation_rate > 0, "community_seed_scaled_mutation_rate must be positive" assert isinstance( GC.community_seed_populations, list ), "community_seed_populations must be a list of positive integers" for i in range(len(GC.community_seed_populations)): GC.community_seed_populations[i] = int( GC.community_seed_populations[i]) assert GC.community_seed_populations[ i] > 0, "community_seed_populations must be a list of positive integers" assert isinstance( GC.community_seed_migration_rates, dict ), "community_seed_migration_rates must be a dictionary of dictionaries of floats" try: for i in range(len(GC.community_seed_populations)): for j in range(len(GC.community_seed_migration_rates)): if i == j: assert i not in GC.community_seed_migration_rates[ i] or float( GC.community_seed_migration_rates[i][i] ) == 0., "Non-zero self-migration rate found in community_seed_migration_rates" else: GC.community_seed_migration_rates[i][j] = float( GC.community_seed_migration_rates[i][j]) assert GC.community_seed_migration_rates[i][ j] >= 0, "Migration rates in community_seed_migration_rates must be at least 0" except KeyError: assert False, "Malformed community_seed_migration_rates dictionary. See FAVITES Wiki for usage information" GC.check_seqgen_executable()
def get_edge_list(): du = GC.d_or_u == 'd' cn = fast_gnp_random_graph(GC.num_cn_nodes, GC.er_prob, directed=du, seed=GC.random_number_seed) if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, GC.d_or_u) f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out
def sample_time(): # no more nodes to infect if GC.contact_network.num_uninfected_nodes() == 0: GC.next_trans = None GC.end_time = GC.time return None # create priority queue if GC.trans_pq is None: GC.trans_pq = GC.SortedLinkedList() GC.trans_pq_v2trans = dict() GC.trans_susceptible = set() # attempt to fill priority queue if len(GC.trans_pq) == 0: for node in GC.contact_network.get_infected_nodes(): for edge in GC.contact_network.get_edges_from(node): neighbor = edge.get_to() if not neighbor.is_infected(): GC.trans_susceptible.add(neighbor) while len(GC.trans_susceptible) > 0: v = GC.trans_susceptible.pop() infected_neighbors = [ edge.get_from() for edge in GC.contact_network.get_edges_to(v) if edge.get_from().is_infected() ] if len(infected_neighbors) > 0: u = choice(infected_neighbors) t = GC.time + exponential( scale=1 / (GC.infection_rate * len(infected_neighbors)) ) # min of exponentials is exponential with sum of rates GC.trans_pq.put(v, t) GC.trans_pq_v2trans[v] = (u, v, t) # if failed to fill priority queue, simulation is done if len(GC.trans_pq) == 0: GC.next_trans = None GC.end_time = GC.time return None # get next transmission event v = GC.trans_pq.getFront() u, v, t = GC.trans_pq_v2trans[v] GC.next_trans = (u, v, t) del GC.trans_pq_v2trans[v] return t
def subsample_transmission_network(): nodes = {GC.contact_network.get_node(n) for n in GC.final_sequences} die = {} for u, v, t in GC.contact_network.get_transmissions(): if u not in die: die[u] = 1 else: die[u] += 1 if v not in die: die[v] = 1 else: die[v] += 1 die = {n: die[n] for n in nodes if n in die} num_sample = GC.node_sample_fraction * len(die.keys()) out = [] while len(die) != 0 and len(out) < num_sample: n = GC.roll(die) out.append(n) die.pop(n) return out
def finalize(): if not hasattr(GC,'final_sequences'): # GC.final_sequences[cn_node][t] = set of (label,seq) tuples GC.final_sequences = {} if GC.errorfree_sequence_file.lower().endswith('.gz'): from gzip import open as gopen lines = [l.decode().strip() for l in gopen(GC.errorfree_sequence_file)] else: lines = [l.strip() for l in open(GC.errorfree_sequence_file)] lines = [l for l in lines if len(l) != 0] if len(lines) == 0: return seqs = GC.parseFASTA(lines) for ID,seq in seqs.items(): v,n,t = ID.split('|'); t = float(t) if v == 'DUMMY': continue if n not in GC.final_sequences: GC.final_sequences[n] = {} if t not in GC.final_sequences[n]: GC.final_sequences[n][t] = [] GC.final_sequences[n][t].append((v,seq))
def get_edge_list(): try: cn = random_degree_sequence_graph(GC.cn_degree_sequence, tries=GC.cn_tries, seed=GC.random_number_seed) except NetworkXUnfeasible: from os import chdir chdir(GC.START_DIR) assert False, "Contact network degree sequence is not graphical" except NetworkXError: from os import chdir chdir(GC.START_DIR) assert False, "NetworkX failed to produce graph after %d tries" % GC.cn_tries if GC.random_number_seed is not None: GC.random_number_seed += 1 out = GC.nx2favites(cn, 'u') f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out
def run(path, ORIG_CONFIG): ''' Simulation driver. Even if you add your own modules, you probably shouldn't need to modify this function. The one clear exception would be if your module requires additional user input (e.g. custom evolution model modules), which would then require you to call it with the required arguments. ''' # store starting directory GC.FAVITES_START_TIME = time() GC.FAVITES_DIR = path if GC.VERBOSE: print('[%s] FAVITES Driver starting' % datetime.now(), file=stderr) GC.START_DIR = getcwd() # load modules for module in MF.modules: MF.modules[module].init() LOG = MF.modules['Logging'] # set up environment orig_dir = getcwd() try: makedirs(GC.out_dir) except: if 'FAVITES_DOCKER' not in environ: # bypass error (Docker makes the folder automatically) if isdir(abspath(expanduser(GC.out_dir))): if GC.VERBOSE: print('[%s] Output directory exists: %s' % (datetime.now(), environ['out_dir_print']), file=stderr) response = 'x' while len(response) == 0 or response[0] not in {'y', 'n'}: response = input( "ERROR: Output directory exists. Overwrite? All contents will be deleted. (y/n) " ).strip().lower() if response[0] == 'y': from shutil import rmtree rmtree(GC.out_dir) makedirs(GC.out_dir) else: exit(-1) else: LOG.writeln("ERROR: Unable to create the output directory") exit(-1) chdir(GC.out_dir) f = open('CONFIG.json', 'w') f.write(ORIG_CONFIG) f.close() # begin simulation printMessage(LOG) LOG.writeln( "======================== Simulation Process ========================" ) if GC.VERBOSE: print('[%s] Starting simulation' % datetime.now(), file=stderr) makedirs("error_free_files", exist_ok=True) makedirs("error_free_files/phylogenetic_trees", exist_ok=True) makedirs("error_prone_files", exist_ok=True) # create ContactNetwork object LOG.write("Loading contact network...") if GC.VERBOSE: print('[%s] Loading contact network' % datetime.now(), file=stderr) GC.cn_edge_list = MF.modules['ContactNetworkGenerator'].get_edge_list() LOG.writeln(" done") LOG.write("Creating ContactNetwork object...") if GC.VERBOSE: print('[%s] Initializing ContactNetwork object...' % datetime.now(), file=stderr) contact_network = MF.modules['ContactNetwork'](GC.cn_edge_list) assert isinstance(contact_network, MF.module_abstract_classes['ContactNetwork'] ), "contact_network is not a ContactNetwork object" assert contact_network.num_nodes( ) > 1, "ContactNetwork must have at least 2 nodes" assert contact_network.num_edges( ) > 0, "ContactNetwork must have at least 1 edge" MF.modules['TransmissionNodeSample'].check_contact_network( contact_network) GC.contact_network = contact_network LOG.writeln(" done") # select seed nodes LOG.write("Selecting seed nodes...") if GC.VERBOSE: print('[%s] Selecting seed nodes' % datetime.now(), file=stderr) GC.seed_nodes = MF.modules['SeedSelection'].select_seeds() assert isinstance(GC.seed_nodes, list) or isinstance( GC.seed_nodes, set), "seed_nodes is not a list nor a set" for node in GC.seed_nodes: if GC.VERBOSE: print('[%s] Seed\tTime 0\tNode %s' % (datetime.now(), str(node)), file=stderr) assert isinstance( node, MF.module_abstract_classes['ContactNetworkNode'] ), "seed_nodes contains items that are not ContactNetworkNode objects" LOG.writeln(" done") # infect seed nodes LOG.write("Infecting seed nodes...") if GC.VERBOSE: print('[%s] Infecting seed nodes' % datetime.now(), file=stderr) GC.root_viruses = [] GC.seed_to_first_virus = {} f = gopen('seed_sequences.tsv.gz', 'wb', 9) for node in GC.seed_nodes: seq = MF.modules['SeedSequence'].generate() virus = MF.modules['TreeNode'](time=0.0, seq=seq, contact_network_node=node) f.write(('%s\t%s\n' % (virus.get_label(), seq)).encode()) GC.root_viruses.append(virus) node.infect(0.0, virus) GC.contact_network.add_transmission(None, node, 0.0) GC.seed_to_first_virus[node] = virus f.write(b'\n') f.close() if isdir('seed_sequences'): rename('seed_sequences.tsv.gz', 'seed_sequences/seed_sequences.tsv.gz') LOG.writeln(" done") # iterative step of transmissions LOG.write("Performing transmission simulations...") if GC.VERBOSE: print('[%s] Performing transmission iterations' % datetime.now(), file=stderr) GC.first_time_transmitting = {} while True: t = MF.modules['TransmissionTimeSample'].sample_time() if t is None or MF.modules['EndCriteria'].done(): break assert t >= GC.time, "Transmission cannot go back in time!" u, v = MF.modules['TransmissionNodeSample'].sample_nodes(t) if u is None and v is None: break GC.time = t if u == v: # u = v implies uninfection (recovery or death) u.uninfect() GC.contact_network.add_transmission(u, u, GC.time) continue elif u is None: # u = None implies seed infection at time t > 0 seq = MF.modules['SeedSequence'].generate() virus = MF.modules['TreeNode'](time=GC.time, seq=seq, contact_network_node=v) GC.root_viruses.append(virus) v.infect(GC.time, virus) GC.contact_network.add_transmission(None, v, GC.time) GC.seed_to_first_virus[v] = virus continue MF.modules['NodeEvolution'].evolve_to_current_time(u) MF.modules['NodeEvolution'].evolve_to_current_time(v) virus = MF.modules['SourceSample'].sample_virus(u) u.remove_virus(virus) if not u.is_infected(): GC.contact_network.remove_from_infected(u) v.infect(GC.time, virus) GC.contact_network.add_to_infected(v) GC.contact_network.add_transmission(u, v, GC.time) if u not in GC.first_time_transmitting: GC.first_time_transmitting[u] = GC.time GC.transmissions = GC.contact_network.get_transmissions() assert isinstance(GC.transmissions, list), "get_transmissions() did not return a list!" LOG.writeln(" done") # finalize global time LOG.write("Finalizing transmission simulations...") if GC.VERBOSE: print('[%s] Finalizing transmissions/evolution' % datetime.now(), file=stderr) MF.modules['EndCriteria'].finalize_time() LOG.writeln(" done") # write transmission network as edge list LOG.write("Writing true transmission network to file...") f = gopen('error_free_files/transmission_network.txt.gz', 'wb', 9) f.write( ('\n'.join("%s\t%s\t%f" % e for e in GC.transmissions)).encode()) f.write(b'\n') f.close() LOG.writeln(" done") LOG.writeln( "True transmission network was written to: %s/error_free_files/transmission_network.txt" % environ['out_dir_print']) if GC.VERBOSE: print('[%s] Wrote transmission network to file' % datetime.now(), file=stderr) # perform patient sampling in time (on all infected nodes; will subsample from this later) LOG.write("Sampling patients in time...") GC.cn_sample_times = {} if GC.VERBOSE: print('[%s] Performing person sampling (sequencing)' % datetime.now(), file=stderr) for node in GC.contact_network.nodes_iter(): num_times = MF.modules['NumTimeSample'].sample_num_times(node) assert num_times >= 0, "Encountered negative number of sampling events" times = MF.modules['TimeSample'].sample_times(node, num_times) for t in times: assert t <= GC.time, "Encountered a patient sampling time larger than the global end time" if len(times) != 0: GC.cn_sample_times[node] = times if GC.VERBOSE: print('[%s] Node %s sampled at times %s' % (datetime.now(), str(node), str(times)), file=stderr) elif GC.VERBOSE: print('[%s] Node %s not sampled' % (datetime.now(), str(node)), file=stderr) LOG.writeln(" done") # evolve to end time LOG.write("Evolving trees and sequences to end time...") nodes = [node for node in GC.contact_network.get_infected_nodes()] MF.modules['NodeEvolution'].evolve_to_current_time(None, finalize=True) for node in nodes: MF.modules['NodeEvolution'].evolve_to_current_time(node, finalize=True) MF.modules['SequenceEvolution'].evolve_to_current_time(node) LOG.writeln(" done") # prune sampled trees LOG.write("Pruning sampled trees...") if GC.PRUNE_TREES: if GC.VERBOSE: print('[%s] Pruning sampled trees' % datetime.now(), file=stderr) GC.prune_sampled_trees() GC.pruned_newick_trees_time = [e for e in GC.sampled_trees ] # (rootvirus,treestr) tuples LOG.writeln(" done") # convert trees from unit of time to unit of mutation rate LOG.write("Converting trees from time to mutation rate...") if GC.VERBOSE: print('[%s] Converting sampled trees from time to mutation rate' % datetime.now(), file=stderr) GC.pruned_newick_trees = [ (e[0], MF.modules['TreeUnit'].time_to_mutation_rate(e[1])) for e in GC.pruned_newick_trees_time ] LOG.writeln(" done") # merge cluster trees with seed tree (if applicable) LOG.write( "Merging true phylogenetic trees with true seed tree (if applicable)..." ) GC.final_tree_to_root_seq = [ e[0].get_seq() for i, e in enumerate(GC.pruned_newick_trees) ] GC.merged_trees, GC.merged_trees_time = MF.modules[ 'SeedSequence'].merge_trees() LOG.writeln(" done") # write phylogenetic trees (expected number of mutations) as Newick files LOG.write("Writing true phylogenetic tree(s) to file(s)...") if len(GC.merged_trees) == 0: for i in range(len(GC.pruned_newick_trees)): f = gopen( 'error_free_files/phylogenetic_trees/tree_%d.tre.gz' % i, 'wb', 9) f.write(GC.pruned_newick_trees[i][1].strip().encode()) f.write(b'\n') f.close() f = gopen( 'error_free_files/phylogenetic_trees/tree_%d.time.tre.gz' % i, 'wb', 9) f.write(GC.pruned_newick_trees_time[i][1].strip().encode()) f.write(b'\n') f.close() else: for i in range(len(GC.merged_trees)): f = gopen( 'error_free_files/phylogenetic_trees/merged_tree_%d.tre.gz' % i, 'wb', 9) f.write(GC.merged_trees[i].strip().encode()) f.write(b'\n') f.close() f = gopen( 'error_free_files/phylogenetic_trees/merged_tree_%d.time.tre.gz' % i, 'wb', 9) f.write(GC.merged_trees_time[i].strip().encode()) f.write(b'\n') f.close() LOG.writeln(" done") LOG.writeln( "True phylogenetic trees were written to: %s/error_free_files/phylogenetic_trees" % environ['out_dir_print']) if GC.VERBOSE: print( '[%s] Wrote phylogenetic trees (expected number of mutations)' % datetime.now(), file=stderr) # finalize sequence data LOG.write("Finalizing sequence simulations...") if GC.VERBOSE: print('[%s] Finalizing sequences' % datetime.now(), file=stderr) MF.modules['SequenceEvolution'].finalize( ) # in case the module creates all sequences at the end LOG.writeln(" done") # write error-free sequence data LOG.write("Writing final sequence data to file...") f = gopen('error_free_files/sequence_data.fasta.gz', 'wb', 9) for cn_label in GC.final_sequences: for t in GC.final_sequences[cn_label]: for l, s in GC.final_sequences[cn_label][t]: f.write((">%s\n%s\n" % (l, s)).encode()) f.close() LOG.writeln(" done") LOG.writeln("True sequence data were written to: %s/error_free_files" % environ['out_dir_print']) LOG.writeln() if GC.VERBOSE: print('[%s] Wrote true sequence data' % datetime.now(), file=stderr) # introduce real data artifacts LOG.writeln( "\n======================= Real Data Artifacts =======================" ) # subsample the contact network nodes and write sequences to file LOG.write("Subsampling contact network nodes...") if GC.VERBOSE: print('[%s] Subsampling contact network nodes' % datetime.now(), file=stderr) GC.subsampled_nodes = MF.modules[ 'NodeAvailability'].subsample_transmission_network() if len(GC.subsampled_nodes) != 0: tmp = [] rmv = [] for node in GC.subsampled_nodes: cn_label = node.get_name() if cn_label in GC.final_sequences: for t in GC.final_sequences[cn_label]: for l, s in GC.final_sequences[cn_label][t]: tmp.append((">%s\n%s\n" % (l, s)).encode()) else: rmv.append(node) for n in rmv: GC.subsampled_nodes.remove(n) if len(tmp) != 0: f = gopen( 'error_prone_files/sequence_data_subsampled_errorfree.fasta.gz', 'wb', 9) for e in tmp: f.write(e) f.close() LOG.writeln(" done") # introduce sequencing error LOG.write("Simulating sequencing error...") for node in GC.subsampled_nodes: if GC.VERBOSE: print('[%s] Sequencing error for Node %s' % (datetime.now(), str(node)), file=stderr) MF.modules['Sequencing'].introduce_sequencing_error(node) MF.modules['Sequencing'].finalize() LOG.writeln(" done") LOG.writeln( "Error prone sequence data were written to: %s/error_prone_files" % environ['out_dir_print']) LOG.writeln() # return to original directory and finish chdir(orig_dir) if GC.VERBOSE: print('[%s] Outputting simulation information' % datetime.now(), file=stderr) LOG.writeln( "\n=========================== Information ===========================" ) GC.FAVITES_OUTPUT_SIZE = 0 for dirpath, dirnames, filenames in walk(GC.out_dir): for f in filenames: fp = join(dirpath, f) GC.FAVITES_OUTPUT_SIZE += getsize(fp) LOG.writeln("Output Size (bytes): %d" % GC.FAVITES_OUTPUT_SIZE) LOG.writeln("Execution Time (seconds): %d" % (time() - GC.FAVITES_START_TIME)) if GC.VERBOSE: print('[%s] Outputting list of citations' % datetime.now(), file=stderr) LOG.writeln( "\n\n============================ Citations ============================" ) citations = set() for module in MF.modules: cite = MF.modules[module].cite() if isinstance(cite, str): citations.add(cite.strip()) elif isinstance(cite, set) or isinstance(cite, list): for c in cite: citations.add(c.strip()) for citation in sorted(citations): LOG.writeln(citation) LOG.close() if GC.VERBOSE: print('[%s] FAVITES Driver finished' % datetime.now(), file=stderr)
def get_edge_list(): # set things up a = (GC.cng_m + 1) / (2 * GC.cng_m) probs = [0] + [ 2 * a / (i * (i + 1)) for i in range(1, GC.cng_m + 1) ] # prepend 0 to make non-zero probabilities be indices 1 through M M = [ m for m, n in enumerate( multinomial(len(GC.cng_N), probs, size=1)[0]) for _ in range(n) ] com = list() for i in range(len(M)): com.append( GC.nx2favites( barabasi_albert_graph(GC.cng_N[i], M[i], seed=GC.random_number_seed), 'u')) if GC.random_number_seed is not None: GC.random_number_seed += 1 # process disconnected BA graphs (one per community) nodes = list() node_lines = list() edge_lines = list() for i, g in enumerate(com): nodes.append(list()) node_prefix = 'COM%d' % i for l in g: if len(l) == 0 or l[0] == '#': continue parts = l.split() assert parts[0] in {'NODE', 'EDGE' }, "Invalid FAVITES edge list encountered" if parts[0] == 'NODE': name = "%s-%s" % (node_prefix, parts[1]) nodes[-1].append(name) node_lines.append("NODE\t%s\t%s" % (name, parts[2])) else: u = "%s-%s" % (node_prefix, parts[1]) v = "%s-%s" % (node_prefix, parts[2]) edge_lines.append("EDGE\t%s\t%s\t%s\t%s" % (u, v, parts[3], parts[4])) # add edges between communities possible_num_er_edges = sum(len(c) for c in nodes)**2 - sum( len(c)**2 for c in nodes) if len(nodes) == 1: num_er_edges = 0 # only 1 community else: num_er_edges = 2 * binomial( possible_num_er_edges, GC.cng_p) # multiply by 2 for bidirectionality er_edges = set() er_choice_indices = list(range(len(nodes))) while len(er_edges) != 2 * num_er_edges: i = choice(er_choice_indices) j = choice(er_choice_indices) while i == j: j = choice(er_choice_indices) u = choice(nodes[i]) v = choice(nodes[j]) er_edges.add((u, v)) er_edges.add((v, u)) edge_lines.append("EDGE\t%s\t%s\t.\tu" % (u, v)) # output final graph out = node_lines + edge_lines f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb', 9) f.write('\n'.join(out).encode()) f.write(b'\n') f.close() return out ''' edgelists = [] for cng_i, cng in enumerate(local_cn_generators): MF.modules['ContactNetworkGenerator'] = MF.module_implementations['ContactNetworkGenerator'][cng['ContactNetworkGenerator']]['class'] for param in MF.module_implementations['ContactNetworkGenerator'][cng['ContactNetworkGenerator']]['req']: setattr(GC,param,cng[param]) MF.modules['ContactNetworkGenerator'].init() edgelists.append(MF.modules['ContactNetworkGenerator'].get_edge_list()) g = {} # g[node]['attributes' or 'edges'] GC.cn_communities = [] total_num_nodes = 0 out = [] for edgelist_i,edgelist in enumerate(edgelists): GC.cn_communities.append([]) node_prefix = "CNG%d-COM%d"%(GC.cng_community_num,edgelist_i) for line in edgelist: if len(line) == 0 or line[0] == '#': continue parts = line.split() assert parts[0] in {'NODE','EDGE'}, "Invalid FAVITES edge list encountered" if parts[0] == 'NODE': name = "%s-%s" % (node_prefix,parts[1]) assert name not in g, "Duplicate node name encountered" g[name] = {'edges':[], 'attributes':parts[2]} out.append("NODE\t%s\t%s" % (name,parts[2])) GC.cn_communities[-1].append(name) else: u = "%s-%s" % (node_prefix,parts[1]) assert u in g, "Encountered non-existant node name" v = "%s-%s" % (node_prefix,parts[2]) assert v in g, "Encountered non-existant node name" g[u]['edges'].append((v,parts[3],parts[4])) assert len(GC.cn_communities[-1]) != 0, "Encountered empty community" total_num_nodes += len(GC.cn_communities[-1]) for u in g: for v,attr,du in g[u]['edges']: out.append("EDGE\t%s\t%s\t%s\t%s" % (u,v,attr,du)) possible_across_edges = 0 for i in range(len(GC.cn_communities)-1): for j in range(i+1,len(GC.cn_communities)): possible_across_edges += len(GC.cn_communities[i])*len(GC.cn_communities[j]) num_across_edges = binomial(possible_across_edges,GC.cn_p_across) done = set() for _ in range(num_across_edges): i,j = sample(range(len(GC.cn_communities)),2) u,v = choice(GC.cn_communities[i]),choice(GC.cn_communities[j]) while (u,v) in done: i,j = sample(range(len(GC.cn_communities)),2) u,v = choice(GC.cn_communities[i]),choice(GC.cn_communities[j]) done.add((u,v)) out.append("EDGE\t%s\t%s\t.\tu" % (u,v)) f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9) f.write('\n'.join(out).encode()); f.write(b'\n') f.close() f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9) f.write(str(GC.cn_communities).encode()); f.write(b'\n') f.close() return out''' exit()
def prep_GEMF(): # write GEMF parameter file orig_dir = getcwd() GC.gemf_path = expanduser(GC.gemf_path.strip()) makedirs(GC.gemf_out_dir, exist_ok=True) f = open(GC.gemf_out_dir + "/para.txt",'w') f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.hiv_ns_to_s) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ns_to_d) + "\n0\t0\t" + str(GC.hiv_s_to_i1_seed) + "\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_s_to_d) + "\n0\t0\t0\t" + str(GC.hiv_i1_to_i2) + "\t0\t0\t" + str(GC.hiv_i1_to_a1) + "\t0\t0\t0\t" + str(GC.hiv_i1_to_d) + "\n0\t0\t0\t0\t" + str(GC.hiv_i2_to_i3) + "\t0\t0\t" + str(GC.hiv_i2_to_a2) + "\t0\t0\t" + str(GC.hiv_i2_to_d) + "\n0\t0\t0\t0\t0\t" + str(GC.hiv_i3_to_i4) + "\t0\t0\t" + str(GC.hiv_i3_to_a3) + "\t0\t" + str(GC.hiv_i3_to_d) + "\n0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_i4_to_a4) + "\t" + str(GC.hiv_i4_to_d) + "\n0\t0\t" + str(GC.hiv_a1_to_i1) + "\t0\t0\t0\t0\t" + str(GC.hiv_a1_to_a2) + "\t0\t0\t" + str(GC.hiv_a1_to_d) + "\n0\t0\t0\t" + str(GC.hiv_a2_to_i2) + "\t0\t0\t0\t0\t" + str(GC.hiv_a2_to_a3) + "\t0\t" + str(GC.hiv_a2_to_d) + "\n0\t0\t0\t0\t" + str(GC.hiv_a3_to_i3) + "\t0\t0\t0\t0\t" + str(GC.hiv_a3_to_a4) + "\t" + str(GC.hiv_a3_to_d) + "\n0\t0\t0\t0\t0\t" + str(GC.hiv_a4_to_i4) + "\t0\t0\t0\t0\t" + str(GC.hiv_a4_to_d) + "\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") # HIV-ART-specific f.write("[EDGED_TRAN_MATRIX]\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i1) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i2) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i3) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i4) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a1) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a2) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a3) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a4) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("[STATUS_BEGIN]\n0\n\n") infectious = ['I1','I2','I3','I4','A1','A2','A3','A4'] f.write("[INDUCER_LIST]\n" + ' '.join([str(GC.gemf_state_to_num[i]) for i in infectious]) + "\n\n") f.write("[SIM_ROUNDS]\n1\n\n") f.write("[INTERVAL_NUM]\n1\n\n") f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n") f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n") f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) + "\n\n") f.write("[SHOW_INDUCER]\n1\n\n") f.write("[DATA_FILE]\n" + '\n'.join(["network.txt"]*len(infectious)) + "\n\n") f.write("[STATUS_FILE]\nstatus.txt\n\n") if GC.random_number_seed is not None: f.write("[RANDOM_SEED]\n%d\n\n"%GC.random_number_seed) f.write("[OUT_FILE]\noutput.txt") f.close() # write GEMF network file f = open(GC.gemf_out_dir + "/network.txt",'w') num2node = {} node2num = {} for edge in GC.contact_network.edges_iter(): u = edge.get_from() v = edge.get_to() if u not in node2num: num = len(node2num) + 1 node2num[u] = num num2node[num] = u if v not in node2num: num = len(node2num) + 1 node2num[v] = num num2node[num] = v f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n') f.close() # write GEMF to original mapping f = open(GC.gemf_out_dir + "/gemf2orig.json",'w') f.write(str({num:num2node[num].get_name() for num in num2node})) f.close() # write GEMF status file (NS = 0, S = 1, I1 = 2, I2 = 3, I3 = 4, I4 = 5, A1 = 6, A2 = 7, A3 = 8, A4 = 9, D = 10) leftover = len(num2node) start_states = {'seed':[], 'other':[]} for s in infectious: k = "hiv_freq_%s"%s.lower() if isinstance(getattr(GC,k), float): n = int(len(num2node)*getattr(GC,k)) else: n = getattr(GC,k) start_states['seed'] += [GC.gemf_state_to_num[s]]*n leftover -= n assert len(start_states['seed']) == len(GC.seed_nodes), "At time 0, A1+A2+A3+A4+I1+I2+I3+I4 = %d, but there are %d seed nodes. Fix hiv_freq_* parameters accordingly" % (len(start_states['seed']),len(GC.seed_nodes)) for s in ['NS','S']: k = "hiv_freq_%s"%s.lower() if isinstance(getattr(GC,k), float): n = int(len(num2node)*getattr(GC,k)) else: n = getattr(GC,k) start_states['other'] += [GC.gemf_state_to_num[s]]*n leftover -= n start_states['other'] += [GC.gemf_state_to_num['D']]*leftover shuffle(start_states['seed']); shuffle(start_states['other']) f = open(GC.gemf_out_dir + "/status.txt",'w') seeds = {seed for seed in GC.seed_nodes} # seed nodes are assumed to be in I1 and non-seeds to be in NS for num in sorted(num2node.keys()): node = num2node[num] if node in seeds: s = start_states['seed'].pop() f.write("%d\n"%s) node.gemf_state = s else: s = start_states['other'].pop() f.write("%d\n"%s) node.gemf_state = s f.close() # run GEMF chdir(GC.gemf_out_dir) try: call([GC.gemf_path], stdout=open("log.txt",'w')) except FileNotFoundError: chdir(GC.START_DIR) assert False, "GEMF executable was not found: %s" % GC.gemf_path chdir(orig_dir) # reload edge-based matrices for ease of use matrices = open(GC.gemf_out_dir + '/para.txt').read().strip() outside_infection_matrix = [[float(e) for e in l.split()] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices.index('\n\n[EDGED_TRAN_MATRIX]')].replace('[NODAL_TRAN_MATRIX]\n','').splitlines()] matrices = [[[float(e) for e in l.split()] for l in m.splitlines()] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices.index('\n\n[STATUS_BEGIN]')].replace('[EDGED_TRAN_MATRIX]\n','').split('\n\n')] matrices = {GC.gemf_state_to_num[infectious[i]]:matrices[i] for i in range(len(infectious))} matrices[GC.gemf_state_to_num['S']] = outside_infection_matrix # convert GEMF output to FAVITES transmission network format GC.transmission_num = 0 GC.transmission_state = set() # 'node' and 'time' NUM_INFECTED = len(seeds) GC.transmission_file = [] for line in open(GC.gemf_out_dir + "/output.txt"): t,rate,vNum,pre,post,num0,num1,num2,num3,num4,num5,num6,num7,num8,num9,num10,lists = [i.strip() for i in line.split()] pre,post = int(pre),int(post) vName = num2node[int(vNum)].get_name() lists = lists.split('],[') lists[0] += ']' lists[-1] = '[' + lists[-1] for i in range(1,len(lists)-1): if '[' not in lists[i]: lists[i] = '[' + lists[i] + ']' lists = [eval(l) for l in lists] uNums = [] for l in lists: uNums.extend(l) if post == GC.gemf_state_to_num['D']: NUM_INFECTED -= 1 GC.transmission_file.append((vName,vName,float(t))) if GC.VERBOSE: print('[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post],NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr) elif GC.gemf_num_to_state[pre] == 'S' and GC.gemf_num_to_state[post] == 'I1': NUM_INFECTED += 1 v = num2node[int(vNum)] uNodes = [num2node[num] for num in uNums] uRates = [matrices[uNode.gemf_state][pre][post] for uNode in uNodes] die = {uNodes[i]:GC.prob_exp_min(i, uRates) for i in range(len(uNodes))} if len(die) != 0: u = GC.roll(die) # roll die weighted by exponential infectious rates uName = u.get_name() if GC.VERBOSE: print('[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,uName,GC.gemf_num_to_state[u.gemf_state],vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post],NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr) elif len(die) == 0 or u == v: # new seed uName = None if GC.VERBOSE: print('[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,vName,NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr) GC.transmission_file.append((uName,v.get_name(),float(t))) elif GC.VERBOSE: print('[%s] Transition\tTime %s\tNode %s (%s->%s)' % (datetime.now(),t,vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post]), file=stderr) num2node[int(vNum)].gemf_state = post assert len(GC.transmission_file) != 0, "GEMF didn't output any transmissions" GC.gemf_ready = True GC.gemf_num2node = num2node
def prep_GEMF(): # check for attributes in contact network nodes for node in GC.contact_network.nodes_iter(): attr = node.get_attribute() assert 'MALE' in attr or 'FEMALE' in attr, "All nodes must have MALE or FEMALE in their attributes" assert not ('MALE' in attr and 'FEMALE' in attr), "Nodes cannot be both MALE and FEMALE" if 'MALE' in attr: assert 'CIRCUMCISED' in attr or 'UNCIRCUMCISED' in attr, "MALE nodes must be either CIRCUMCISED or UNCIRCUMCISED" else: assert 'CIRCUMCISED' not in attr, "FEMALE nodes cannot be CIRCUMCISED" # write GEMF parameter file orig_dir = getcwd() GC.gemf_path = expanduser(GC.gemf_path.strip()) makedirs(GC.gemf_out_dir, exist_ok=True) f = open(GC.gemf_out_dir + "/para.txt", 'w') f.write("[NODAL_TRAN_MATRIX]\n") f.write( "0\t" + str(GC.hiv_msu_to_mspc) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_msu_to_d) + "\n") f.write( "0\t0\t" + str(GC.hiv_mspc_to_msch) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mspc_to_d) + "\n") f.write( "0\t0\t0\t" + str(GC.hiv_msch_to_msc) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_msch_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_msc_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t" + str(GC.hiv_miah_to_mia) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_miah_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mia_to_mi1) + "\t" + str(GC.hiv_mia_to_mi2) + "\t" + str(GC.hiv_mia_to_mi3) + "\t" + str(GC.hiv_mia_to_mi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mia_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi1_to_mi2) + "\t0\t0\t" + str(GC.hiv_mi1_to_mj1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi2_to_mi3) + "\t0\t0\t" + str(GC.hiv_mi2_to_mj2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi3_to_mi4) + "\t0\t0\t" + str(GC.hiv_mi3_to_mj3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi4_to_mj4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj1_to_mj2) + "\t0\t0\t" + str(GC.hiv_mj1_to_mt1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj2_to_mj3) + "\t0\t0\t" + str(GC.hiv_mj2_to_mt2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj3_to_mj4) + "\t0\t0\t" + str(GC.hiv_mj3_to_mt3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj4_to_mt4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt1_to_mi1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt1_to_mt2) + "\t0\t0\t" + str(GC.hiv_mt1_to_ma1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt2_to_mi2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt2_to_mt3) + "\t0\t0\t" + str(GC.hiv_mt2_to_ma2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt2_to_d) + "\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt3_to_mi3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt3_to_mt4) + "\t0\t0\t" + str(GC.hiv_mt3_to_ma3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt3_to_d) + "\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt4_to_mi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt4_to_ma4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma1_to_mi1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma1_to_ma2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma1_to_d) + "\n") f.write("0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma2_to_mi2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma2_to_ma3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma2_to_d) + "\n") f.write("0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma3_to_mi3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma3_to_ma4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma4_to_mi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fs_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fia_to_fi1) + "\t" + str(GC.hiv_fia_to_fi2) + "\t" + str(GC.hiv_fia_to_fi3) + "\t" + str(GC.hiv_fia_to_fi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fia_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi1_to_fi2) + "\t0\t0\t" + str(GC.hiv_fi1_to_fj1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi2_to_fi3) + "\t0\t0\t" + str(GC.hiv_fi2_to_fj2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi3_to_fi4) + "\t0\t0\t" + str(GC.hiv_fi3_to_fj3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi4_to_fj4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj1_to_fj2) + "\t0\t0\t" + str(GC.hiv_fj1_to_ft1) + "\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj2_to_fj3) + "\t0\t0\t" + str(GC.hiv_fj2_to_ft2) + "\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj3_to_fj4) + "\t0\t0\t" + str(GC.hiv_fj3_to_ft3) + "\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj4_to_ft4) + "\t0\t0\t0\t0\t" + str(GC.hiv_fj4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft1_to_fi1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft1_to_ft2) + "\t0\t0\t" + str(GC.hiv_ft1_to_fa1) + "\t0\t0\t0\t" + str(GC.hiv_ft1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft2_to_fi2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft2_to_ft3) + "\t0\t0\t" + str(GC.hiv_ft2_to_fa2) + "\t0\t0\t" + str(GC.hiv_ft2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft3_to_fi3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft3_to_ft4) + "\t0\t0\t" + str(GC.hiv_ft3_to_fa3) + "\t0\t" + str(GC.hiv_ft3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft4_to_fi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ft4_to_fa4) + "\t" + str(GC.hiv_ft4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa1_to_fi1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa1_to_fa2) + "\t0\t0\t" + str(GC.hiv_fa1_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa2_to_fi2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa2_to_fa3) + "\t0\t" + str(GC.hiv_fa2_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa3_to_fi3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa3_to_fa4) + "\t" + str(GC.hiv_fa3_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa4_to_fi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa4_to_d) + "\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write("\n") infectious = [ 'MIAH', 'MIA', 'MI1', 'MI2', 'MI3', 'MI4', 'MJ1', 'MJ2', 'MJ3', 'MJ4', 'MT1', 'MT2', 'MT3', 'MT4', 'MA1', 'MA2', 'MA3', 'MA4', 'FIA', 'FI1', 'FI2', 'FI3', 'FI4', 'FJ1', 'FJ2', 'FJ3', 'FJ4', 'FT1', 'FT2', 'FT3', 'FT4', 'FA1', 'FA2', 'FA3', 'FA4' ] f.write("[EDGED_TRAN_MATRIX]\n") for _ in infectious: by = _.lower() f.write( "0\t0\t0\t0\t0\t" + str(getattr(GC, 'hiv_msu_to_mia_by_' + by)) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t" + str(getattr(GC, 'hiv_mspc_to_mia_by_' + by)) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t" + str(getattr(GC, 'hiv_msch_to_miah_by_' + by)) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t" + str(getattr(GC, 'hiv_msc_to_mia_by_' + by)) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(getattr(GC, 'hiv_fs_to_fia_by_' + by)) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write( "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n" ) f.write('\n') f.write("[STATUS_BEGIN]\n0\n\n") f.write("[INDUCER_LIST]\n" + ' '.join([str(GC.gemf_state_to_num[i]) for i in infectious]) + "\n\n") f.write("[SIM_ROUNDS]\n1\n\n") f.write("[INTERVAL_NUM]\n1\n\n") f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n") f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n") f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) + "\n\n") f.write("[SHOW_INDUCER]\n1\n\n") f.write("[DATA_FILE]\n" + '\n'.join(["network.txt"] * len(infectious)) + "\n\n") f.write("[STATUS_FILE]\nstatus.txt\n\n") if GC.random_number_seed is not None: f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed) f.write("[OUT_FILE]\noutput.txt") f.close() # write GEMF network file f = open(GC.gemf_out_dir + "/network.txt", 'w') num2node = {} node2num = {} for edge in GC.contact_network.edges_iter(): u = edge.get_from() v = edge.get_to() if u not in node2num: num = len(node2num) + 1 node2num[u] = num num2node[num] = u if v not in node2num: num = len(node2num) + 1 node2num[v] = num num2node[num] = v f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n') f.close() # write GEMF to original mapping f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w') f.write(str({num: num2node[num].get_name() for num in num2node})) f.close() # write GEMF status file (see above for the states) f = open(GC.gemf_out_dir + "/status.txt", 'w') seeds = {seed for seed in GC.seed_nodes } # seed nodes are assumed to be in acute infection for num in sorted(num2node.keys()): node = num2node[num] attr = node.get_attribute() if node in seeds: if 'MALE' in attr: f.write(str(GC.gemf_state_to_num['MIA']) + "\n") # PopART-specific node.gemf_state = GC.gemf_state_to_num['MIA'] else: f.write(str(GC.gemf_state_to_num['FIA']) + "\n") # PopART-specific node.gemf_state = GC.gemf_state_to_num['FIA'] else: if 'MALE' in attr: if 'CIRCUMCISED' in attr: f.write(str(GC.gemf_state_to_num['MSC']) + "\n") # PopART-specific node.gemf_state = GC.gemf_state_to_num['MSC'] else: f.write(str(GC.gemf_state_to_num['MSU']) + "\n") # PopART-specific node.gemf_state = GC.gemf_state_to_num['MSU'] else: f.write(str(GC.gemf_state_to_num['FS']) + "\n") # PopART-specific node.gemf_state = GC.gemf_state_to_num['FS'] f.close() # run GEMF chdir(GC.gemf_out_dir) try: call([GC.gemf_path], stdout=open("log.txt", 'w')) except FileNotFoundError: chdir(GC.START_DIR) assert False, "GEMF executable was not found: %s" % GC.gemf_path chdir(orig_dir) # reload edge-based matrices for ease of use matrices = open(GC.gemf_out_dir + '/para.txt').read().strip() matrices = [[ [float(e) for e in l.split()] for l in m.splitlines() ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices. index('\n\n[STATUS_BEGIN]')].replace( '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')] matrices = { GC.gemf_state_to_num[infectious[i]]: matrices[i] for i in range(len(infectious)) } # convert GEMF output to FAVITES transmission network format GC.transmission_num = 0 GC.transmission_state = set() # 'node' and 'time' GC.transmission_file = [] for line in open(GC.gemf_out_dir + "/output.txt"): parts = [i.strip() for i in line.split()] t = parts[0] rate = parts[1] vNum = parts[2] pre = int(parts[3]) post = int(parts[4]) lists = parts[-1] lists = lists.split('],[') lists[0] += ']' lists[-1] = '[' + lists[-1] for i in range(1, len(lists) - 1): if '[' not in lists[i]: lists[i] = '[' + lists[i] + ']' lists = [eval(l) for l in lists] uNums = [] for l in lists: uNums.extend(l) if post == GC.gemf_state_to_num['D']: vName = num2node[int(vNum)].get_name() GC.transmission_file.append((vName, vName, float(t))) elif len(lists[0]) == 0: v = num2node[int(vNum)] uNodes = [num2node[num] for num in uNums] uRates = [ matrices[uNode.gemf_state][pre][post] for uNode in uNodes ] die = { uNodes[i]: GC.prob_exp_min(i, uRates) for i in range(len(uNodes)) } if len(die) != 0: uName = GC.roll(die).get_name( ) # roll die weighted by exponential infectious rates elif len(die) == 0 or u == v: # new seed uName = None GC.transmission_file.append((uName, v.get_name(), float(t))) num2node[int(vNum)].gemf_state = post assert len( GC.transmission_file) != 0, "GEMF didn't output any transmissions" GC.gemf_ready = True
def prep_GEMF(): # write GEMF parameter file orig_dir = getcwd() GC.gemf_path = expanduser(GC.gemf_path.strip()) makedirs(GC.gemf_out_dir, exist_ok=True) f = open(GC.gemf_out_dir + "/para.txt", 'w') f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.seir_beta_seed) + "\t0\t0\n0\t0\t" + str(GC.seir_lambda) + "\t0\n0\t0\t0\t" + str(GC.seir_delta) + "\n0\t0\t0\t0\n\n") # SEIR-specific f.write( "[EDGED_TRAN_MATRIX]\n0\t" + str(GC.seir_beta_by_i) + "\t0\t0\n0\t0\t0\t0\n0\t0\t0\t0\n0\t0\t0\t0\n\n") # SEIR-specific f.write("[STATUS_BEGIN]\n0\n\n") f.write("[INDUCER_LIST]\n" + str(GC.gemf_state_to_num['I']) + "\n\n") f.write("[SIM_ROUNDS]\n1\n\n") f.write("[INTERVAL_NUM]\n1\n\n") f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n") f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n") f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) + "\n\n") f.write("[SHOW_INDUCER]\n1\n\n") f.write("[DATA_FILE]\nnetwork.txt\n\n") f.write("[STATUS_FILE]\nstatus.txt\n\n") if GC.random_number_seed is not None: f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed) f.write("[OUT_FILE]\noutput.txt") f.close() # write GEMF network file f = open(GC.gemf_out_dir + "/network.txt", 'w') num2node = {} node2num = {} for edge in GC.contact_network.edges_iter(): u = edge.get_from() v = edge.get_to() if u not in node2num: num = len(node2num) + 1 node2num[u] = num num2node[num] = u if v not in node2num: num = len(node2num) + 1 node2num[v] = num num2node[num] = v f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n') f.close() # write GEMF to original mapping f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w') f.write(str({num: num2node[num].get_name() for num in num2node})) f.close() # write GEMF status file (0 = S, 1 = E, 2 = I, 3 = R) f = open(GC.gemf_out_dir + "/status.txt", 'w') seeds = {seed for seed in GC.seed_nodes} for num in sorted(num2node.keys()): node = num2node[num] if node in seeds: f.write(str(GC.gemf_state_to_num['I']) + "\n") # SEIR-specific node.gemf_state = GC.gemf_state_to_num['I'] else: f.write(str(GC.gemf_state_to_num['S']) + "\n") # SEIR-specific node.gemf_state = GC.gemf_state_to_num['S'] f.close() # run GEMF chdir(GC.gemf_out_dir) try: call([GC.gemf_path], stdout=open("log.txt", 'w')) except FileNotFoundError: chdir(GC.START_DIR) assert False, "GEMF executable was not found: %s" % GC.gemf_path chdir(orig_dir) # reload edge-based matrices for ease of use matrices = open(GC.gemf_out_dir + '/para.txt').read().strip() outside_infection_matrix = [[ float(e) for e in l.split() ] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices. index('\n\n[EDGED_TRAN_MATRIX]')].replace( '[NODAL_TRAN_MATRIX]\n', '').splitlines()] matrices = [[ [float(e) for e in l.split()] for l in m.splitlines() ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices. index('\n\n[STATUS_BEGIN]')].replace( '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')] infectious = ['I'] matrices = { GC.gemf_state_to_num['S']: outside_infection_matrix, GC.gemf_state_to_num['I']: matrices[0] } # convert GEMF output to FAVITES transmission network format GC.transmission_num = 0 GC.transmission_state = set() # 'node' and 'time' NUM_INFECTED = len(seeds) GC.transmission_file = [] for line in open(GC.gemf_out_dir + "/output.txt"): t, rate, vNum, pre, post, num0, num1, num2, num3, lists = [ i.strip() for i in line.split() ] pre, post = int(pre), int(post) vName = num2node[int(vNum)].get_name() lists = lists.split('],[') lists[0] += ']' lists[-1] = '[' + lists[-1] for i in range(1, len(lists) - 1): if '[' not in lists[i]: lists[i] = '[' + lists[i] + ']' lists = [eval(l) for l in lists] uNums = [] for l in lists: uNums.extend(l) if post == GC.gemf_state_to_num['R']: NUM_INFECTED -= 1 GC.transmission_file.append((vName, vName, float(t))) if GC.VERBOSE: print( '[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post], NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) elif post == GC.gemf_state_to_num['E']: NUM_INFECTED += 1 v = num2node[int(vNum)] uNodes = [num2node[num] for num in uNums] uRates = [ matrices[uNode.gemf_state][pre][post] for uNode in uNodes ] die = { uNodes[i]: GC.prob_exp_min(i, uRates) for i in range(len(uNodes)) } if len(die) != 0: u = GC.roll( die ) # roll die weighted by exponential infectious rates uName = u.get_name() if GC.VERBOSE: print( '[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, uName, GC.gemf_num_to_state[u.gemf_state], vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post], NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) elif len(die) == 0 or u == v: # new seed uName = None if GC.VERBOSE: print( '[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, vName, NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) GC.transmission_file.append((uName, v.get_name(), float(t))) elif GC.VERBOSE: print('[%s] Transition\tTime %s\tNode %s (%s->%s)' % (datetime.now(), t, vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post]), file=stderr) num2node[int(vNum)].gemf_state = post assert len( GC.transmission_file) != 0, "GEMF didn't output any transmissions" GC.gemf_ready = True
def init(): GC.pangea_module_check()
def merge_trees(): return GC.merge_trees_seqgen()
def prep_GEMF(): # check for gender attribute in contact network nodes for node in GC.contact_network.nodes_iter(): attr = node.get_attribute() assert 'MALE' in attr or 'FEMALE' in attr, "All nodes must have MALE or FEMALE in their attributes" assert not ('MALE' in attr and 'FEMALE' in attr), "Nodes cannot be both MALE and FEMALE" # write GEMF parameter file orig_dir = getcwd() GC.gemf_path = expanduser(GC.gemf_path.strip()) makedirs(GC.gemf_out_dir, exist_ok=True) f = open(GC.gemf_out_dir + "/para.txt", 'w') f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.gon_ma_to_ms) + "\t0\t0\t0\t0\t0\t0\n" + str(GC.gon_ms_to_ma) + "\t0\t" + str(GC.gon_ms_to_mis_seed) + "\t" + str(GC.gon_ms_to_mia_seed) + "\t0\t0\t0\t0\n" + str(GC.gon_mis_to_ma) + "\t" + str(GC.gon_mis_to_ms) + "\t0\t0\t0\t0\t0\t0\n" + str(GC.gon_mia_to_ma) + "\t" + str(GC.gon_mia_to_ms) + "\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t" + str(GC.gon_fa_to_fs) + "\t0\t0\n0\t0\t0\t0\t" + str(GC.gon_fs_to_fa) + "\t0\t" + str(GC.gon_fs_to_fis_seed) + "\t" + str(GC.gon_fs_to_fia_seed) + "\n0\t0\t0\t0\t" + str(GC.gon_fis_to_fa) + "\t" + str(GC.gon_fis_to_fs) + "\t0\t0\n0\t0\t0\t0\t" + str(GC.gon_fia_to_fa) + "\t" + str(GC.gon_fia_to_fs) + "\t0\t0\n\n") # Gonorrhea-specific f.write("[EDGED_TRAN_MATRIX]\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_mis) + "\t" + str(GC.gon_ms_to_mia_by_mis) + "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t" + str(GC.gon_fs_to_fis_by_mis) + "\t" + str(GC.gon_fs_to_fia_by_mis) + "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_mia) + "\t" + str(GC.gon_ms_to_mia_by_mia) + "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t" + str(GC.gon_fs_to_fis_by_mia) + "\t" + str(GC.gon_fs_to_fia_by_mia) + "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_fis) + "\t" + str(GC.gon_ms_to_mia_by_fis) + "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t" + str(GC.gon_fs_to_fis_by_fis) + "\t" + str(GC.gon_fs_to_fia_by_fis) + "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write( "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_fia) + "\t" + str(GC.gon_ms_to_mia_by_fia) + "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t" + str(GC.gon_fs_to_fis_by_fia) + "\t" + str(GC.gon_fs_to_fia_by_fia) + "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n") f.write("[STATUS_BEGIN]\n0\n\n") infectious = ['MIS', 'MIA', 'FIS', 'FIA'] f.write("[INDUCER_LIST]\n" + ' '.join([str(GC.gemf_state_to_num[e]) for e in infectious]) + "\n\n") f.write("[SIM_ROUNDS]\n1\n\n") f.write("[INTERVAL_NUM]\n1\n\n") f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n") f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n") f.write("[DIRECTED]\n1\n\n") f.write("[SHOW_INDUCER]\n1\n\n") f.write( "[DATA_FILE]\nnetwork.txt\nnetwork.txt\nnetwork.txt\nnetwork.txt\n\n" ) f.write("[STATUS_FILE]\nstatus.txt\n\n") if GC.random_number_seed is not None: f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed) f.write("[OUT_FILE]\noutput.txt") f.close() # write GEMF network file f = open(GC.gemf_out_dir + "/network.txt", 'w') num2node = {} node2num = {} for edge in GC.contact_network.edges_iter(): u = edge.get_from() v = edge.get_to() if u not in node2num: num = len(node2num) + 1 node2num[u] = num num2node[num] = u if v not in node2num: num = len(node2num) + 1 node2num[v] = num num2node[num] = v f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n') f.close() # write GEMF to original mapping f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w') f.write(str({num: num2node[num].get_name() for num in num2node})) f.close() # write GEMF status file (MA = 0, MS = 1, MIS = 2, MIA = 3, FA = 4, FS = 5, FIS = 6, FIA = 7) f = open(GC.gemf_out_dir + "/status.txt", 'w') seeds = {seed for seed in GC.seed_nodes } # seed nodes are assumed to be asymptomatic for num in sorted(num2node.keys()): node = num2node[num] attr = node.get_attribute() if node in seeds: if 'MALE' in attr: f.write(str(GC.gemf_state_to_num['MIA']) + "\n") # Gonorrhea-specific node.gemf_state = GC.gemf_state_to_num['MIA'] else: f.write(str(GC.gemf_state_to_num['FIA']) + "\n") # Gonorrhea-specific node.gemf_state = GC.gemf_state_to_num['FIA'] else: if 'MALE' in attr: f.write(str(GC.gemf_state_to_num['MS']) + "\n") # Gonorrhea-specific node.gemf_state = GC.gemf_state_to_num['MS'] else: f.write(str(GC.gemf_state_to_num['FS']) + "\n") # Gonorrhea-specific node.gemf_state = GC.gemf_state_to_num['FS'] f.close() # run GEMF chdir(GC.gemf_out_dir) try: call([GC.gemf_path], stdout=open("log.txt", 'w')) except FileNotFoundError: chdir(GC.START_DIR) assert False, "GEMF executable was not found: %s" % GC.gemf_path chdir(orig_dir) # reload edge-based matrices for ease of use matrices = open(GC.gemf_out_dir + '/para.txt').read().strip() outside_infection_matrix = [[ float(e) for e in l.split() ] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices. index('\n\n[EDGED_TRAN_MATRIX]')].replace( '[NODAL_TRAN_MATRIX]\n', '').splitlines()] matrices = [[ [float(e) for e in l.split()] for l in m.splitlines() ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices. index('\n\n[STATUS_BEGIN]')].replace( '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')] matrices = { GC.gemf_state_to_num[infectious[i]]: matrices[i] for i in range(len(infectious)) } matrices[GC.gemf_state_to_num['MS']] = outside_infection_matrix matrices[GC.gemf_state_to_num['FS']] = outside_infection_matrix # convert GEMF output to FAVITES transmission network format GC.transmission_num = 0 GC.transmission_state = set() # 'node' and 'time' NUM_INFECTED = len(seeds) GC.transmission_file = [] for line in open(GC.gemf_out_dir + "/output.txt"): t, rate, vNum, pre, post, num0, num1, num2, num3, num4, num5, num6, num7, lists = [ i.strip() for i in line.split() ] pre, post = int(pre), int(post) vName = num2node[int(vNum)].get_name() lists = lists.split('],[') lists[0] += ']' lists[-1] = '[' + lists[-1] for i in range(1, len(lists) - 1): if '[' not in lists[i]: lists[i] = '[' + lists[i] + ']' lists = [eval(l) for l in lists] uNums = [] for l in lists: uNums.extend(l) if post in { GC.gemf_state_to_num[i] for i in ['MA', 'MS', 'FA', 'FS'] } and pre in { GC.gemf_state_to_num[i] for i in ['MIS', 'MIA', 'FIS', 'FIA'] }: NUM_INFECTED -= 1 GC.transmission_file.append((vName, vName, float(t))) if GC.VERBOSE: print( '[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post], NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) elif GC.gemf_num_to_state[pre] in [ 'MS', 'FS' ] and GC.gemf_num_to_state[post] in ['MIS', 'MIA', 'FIS', 'FIA']: NUM_INFECTED += 1 v = num2node[int(vNum)] uNodes = [num2node[num] for num in uNums] uRates = [ matrices[uNode.gemf_state][pre][post] for uNode in uNodes ] die = { uNodes[i]: GC.prob_exp_min(i, uRates) for i in range(len(uNodes)) } if len(die) != 0: u = GC.roll( die ) # roll die weighted by exponential infectious rates uName = u.get_name() if GC.VERBOSE: print( '[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, uName, GC.gemf_num_to_state[u.gemf_state], vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post], NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) elif len(die) == 0 or u == v: # new seed uName = None if GC.VERBOSE: print( '[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(), t, vName, NUM_INFECTED, len(num2node) - NUM_INFECTED), file=stderr) GC.transmission_file.append((uName, v.get_name(), float(t))) elif GC.VERBOSE: print('[%s] Transition\tTime %s\tNode %s (%s->%s)' % (datetime.now(), t, vName, GC.gemf_num_to_state[pre], GC.gemf_num_to_state[post]), file=stderr) num2node[int(vNum)].gemf_state = post assert len( GC.transmission_file) != 0, "GEMF didn't output any transmissions" GC.gemf_ready = True
def init(): SeedSequence_Virus.init() SequenceEvolution_GTRGammaSeqGen.init() GC.seed_height = float(GC.seed_height) assert GC.seed_height > 0, "seed_height must be positive" GC.check_seqgen_executable()