def init():
     SeedSequence_Virus.init()
     SequenceEvolution_GTRGammaSeqGen.init()
     GC.seed_height = float(GC.seed_height)
     assert GC.seed_height > 0, "seed_height must be positive"
     assert GC.check_eval_str(GC.seed_speciation_rate_func), "Potentially dangerous seed_speciation_rate_func"
     GC.check_seqgen_executable()
 def evolve_to_current_time(node, finalize=False):
     if node is None:
         return
     viruses = [virus for virus in node.viruses()]
     for virus in viruses:
         time = GC.time - virus.get_time()
         if time > 0:
             node.remove_virus(virus)
             try:
                 command = [
                     GC.dualbirth_path,
                     str(GC.rate_A),
                     str(GC.rate_B), '-t',
                     str(time)
                 ]
                 if GC.random_number_seed is not None:
                     command += ['-s', str(GC.random_number_seed)]
                     GC.random_number_seed += 1
                 treestr = check_output(command).decode()
             except FileNotFoundError:
                 from os import chdir
                 chdir(GC.START_DIR)
                 assert False, "dualbirth executable was not found: %s" % GC.dualbirth_path
             tree = read_tree_newick(treestr)
             virus.set_time(virus.get_time() + tree.root.edge_length)
             for c in tree.root.children:
                 GC.treenode_add_child(virus, c, node)
Exemple #3
0
 def init():
     SeedSequence_Virus.init()
     SequenceEvolution_GTRGammaSeqGen.init()
     GC.seed_population = float(GC.seed_population)
     assert GC.seed_population > 0, "seed_population must be positive"
     GC.check_seqgen_executable()
     try:
         global TaxonNamespace
         from dendropy import TaxonNamespace
         global treesim
         from dendropy.simulate import treesim
     except:
         from os import chdir
         chdir(GC.START_DIR)
         assert False, "Error loading DendroPy. Install with: pip3 install dendropy"
 def init():
     SeedSequence_Virus.init()
     SequenceEvolution_GTRGammaSeqGen.init()
     GC.seed_birth_rate = float(GC.seed_birth_rate)
     assert GC.seed_birth_rate > 0, "seed_birth_rate must be positive"
     GC.seed_death_rate = float(GC.seed_death_rate)
     assert GC.seed_death_rate >= 0, "seed_death_rate must be at least 0"
     GC.check_seqgen_executable()
     try:
         global treesim
         from dendropy.simulate import treesim
     except:
         from os import chdir
         chdir(GC.START_DIR)
         assert False, "Error loading DendroPy. Install with: pip3 install dendropy"
Exemple #5
0
 def get_edge_list():
     cn = barbell_graph(GC.barbell_m1, GC.barbell_m2)
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     GC.cn_communities = [
         {i
          for i in range(GC.barbell_m1)},
         {
             i
             for i in range(GC.barbell_m1 +
                            GC.barbell_m2, 2 * GC.barbell_m1 +
                            GC.barbell_m2)
         }
     ]  # only left and right communities, not the path
     f = gopen(
         expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),
         'wb', 9)
     f.write(str(GC.cn_communities).encode())
     f.write(b'\n')
     f.close()
     GC.cn_communities = [{str(i) for i in c} for c in GC.cn_communities]
     return out
Exemple #6
0
 def generate():
     if not hasattr(GC, "seed_sequences"):
         rootseq = SeedSequence_Virus.generate()
         treestr = GC.mean_kingman_tree(len(GC.seed_nodes), pop_size=GC.seed_population)
         makedirs(OUT_FOLDER, exist_ok=True)
         f = open(OUT_FOLDER + '/time_tree.tre','w')
         f.write(treestr)
         f.close()
         treestr = MF.modules['TreeUnit'].time_to_mutation_rate(treestr)
         seqgen_file = OUT_FOLDER + '/seed.txt'
         f = open(seqgen_file, 'w')
         f.write("1 %d\nROOT %s\n1\n%s" % (len(rootseq),rootseq,treestr))
         f.close()
         command = [GC.seqgen_path,'-or','-k1']
         if GC.random_number_seed is not None:
             command += ['-z%d'%GC.random_number_seed]
             GC.random_number_seed += 1
         command += GC.seqgen_args.split()
         try:
             seqgen_out = check_output(command, stdin=open(seqgen_file), stderr=open(OUT_FOLDER + '/log_seqgen.txt','w')).decode('ascii')
             f = open(OUT_FOLDER + '/seqgen.out','w')
             f.write(seqgen_out)
             f.close()
         except CalledProcessError as e:
             f = open('seqgen.err','w'); f.write(str(e)); f.close()
             chdir(GC.START_DIR)
             assert False, "Seq-Gen encountered an error"
         GC.seed_sequences = [line.split()[-1].strip() for line in seqgen_out.splitlines()[1:]]
     try:
         return GC.seed_sequences.pop()
     except IndexError:
         assert False, "Late seeds are not supported at this time"
 def sample_times(node, num_times):
     assert hasattr(
         GC, 'transmissions'
     ), "No transmission network found in global context! Run this after the transmission network simulation is done"
     first_time = node.get_first_infection_time()
     if first_time is None:
         return []
     windows = []
     last_time = first_time
     for u, v, t in GC.transmissions:
         if u == node and v == node:
             if last_time is not None and t > last_time:
                 windows.append((last_time, t))
             last_time = None
         elif last_time is None and v == node:
             last_time = t
     if last_time is not None and t > last_time:
         windows.append((last_time, GC.time))
     if len(windows) == 0:
         windows.append((first_time, GC.time))
     weighted_die = {}
     for start, end in windows:
         weighted_die[(start, end)] = end - start
     if len(weighted_die) == 0:
         return []
     if len(weighted_die) == 1:
         weighted_die[list(weighted_die.keys())[0]] = 1
     out = []
     for _ in range(num_times):
         start, end = GC.roll(weighted_die)
         out.append(uniform(start, end))
     return out
Exemple #8
0
 def get_edge_list():
     cn = complete_graph(GC.num_cn_nodes)
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     return out
Exemple #9
0
    def init():
        try:
            global pyvolve
            import pyvolve
        except:
            from os import chdir
            chdir(GC.START_DIR)
            assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve"
        # config validity checks
        custom_model_params = {}
        GC.ecm_type = GC.ecm_type.strip()
        if GC.ecm_type == 'restricted':
            GC.ecm_type = 'ECMrest'
        elif GC.ecm_type == 'unrestricted':
            GC.ecm_type = 'ECMunrest'
        else:
            assert False, 'ecm_type must be "restricted" or "unrestricted"'
        if isinstance(GC.ecm_alpha, str):
            GC.ecm_alpha = GC.ecm_alpha.strip()
            if len(GC.ecm_alpha) != 0:
                custom_model_params['alpha'] = float(GC.ecm_alpha)
        else:
            custom_model_params['alpha'] = float(GC.ecm_alpha)
        if isinstance(GC.ecm_beta, str):
            GC.ecm_beta = GC.ecm_beta.strip()
            if len(GC.ecm_beta) != 0:
                custom_model_params['beta'] = float(GC.ecm_beta)
        else:
            custom_model_params['beta'] = float(GC.ecm_beta)
        if isinstance(GC.ecm_omega, str):
            GC.ecm_omega = GC.ecm_omega.strip()
            if len(GC.ecm_omega) != 0:
                custom_model_params['omega'] = float(GC.ecm_omega)
        else:
            custom_model_params['omega'] = float(GC.ecm_omega)
        assert isinstance(
            GC.ecm_codon_frequencies_dictionary, dict
        ), "Specified ecm_codon_frequencies_dictionary is not a dictionary"
        if len(GC.ecm_codon_frequencies_dictionary) != 0:
            codons = set(GC.generate_all_kmers(3, 'ACGT'))
            codons.difference_update({'TGA', 'TAA',
                                      'TAG'})  # remove STOP codons
            for key in GC.ecm_codon_frequencies_dictionary:
                assert key in codons, "%s is not a valid codon for ecm_codon_frequencies_dictionary. Only include 3-mers of the DNA alphabet, excluding the STOP codons (TGA, TAA, and TAG)"
            assert abs(
                sum(GC.ecm_codon_frequencies_dictionary.values()) - 1
            ) < 0.000000001, "Frequencies in ecm_codon_frequencies_dictionary must sum to 1"
            custom_model_params[
                'state_freqs'] = GC.ecm_codon_frequencies_dictionary

        # set up Pyvolve
        if len(custom_model_params) == 0:
            GC.pyvolve_model = pyvolve.Model(GC.ecm_type)
        else:
            GC.pyvolve_model = pyvolve.Model(GC.ecm_type, custom_model_params)
 def init():
     GC.seqgen_path = expanduser(GC.seqgen_path.strip())
     GC.seqgen_args = GC.seqgen_args.strip()
     assert '-d' not in GC.seqgen_args, "Do not use the Seq-Gen -d argument"
     assert '-k' not in GC.seqgen_args, "Do not use the Seq-Gen -k argument"
     assert '-l' not in GC.seqgen_args, "Do not use the Seq-Gen -l argument"
     assert '-n' not in GC.seqgen_args, "Do not use the Seq-Gen -n argument"
     assert '-o' not in GC.seqgen_args, "Do not use the Seq-Gen -o argument"
     assert '-p' not in GC.seqgen_args, "Do not use the Seq-Gen -p argument"
     assert '-s' not in GC.seqgen_args, "Do not use the Seq-Gen -s argument"
     assert '-m' in GC.seqgen_args, "Must specify a Seq-Gen model using the -m argument"
     mode = GC.seqgen_args.split('-m')[1].strip().split(' ')[0]
     assert mode in SEQGEN_MODES.split(', '), "Invalid Seq-Gen model (%s). Options: %s" % (mode,SEQGEN_MODES)
     GC.check_seqgen_executable()
     try:
         global read_tree_newick
         from treeswift import read_tree_newick
     except:
         from os import chdir
         chdir(GC.START_DIR)
         assert False, "Error loading TreeSwift. Install with: pip3 install treeswift"
 def get_edge_list():
     cn = barabasi_albert_graph(GC.num_cn_nodes,
                                GC.num_edges_from_new,
                                seed=GC.random_number_seed)
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     return out
Exemple #12
0
 def evolve_to_current_time(node, finalize=False):
     viruses = [virus for virus in node.viruses()]
     for virus in viruses:
         time = GC.time - virus.get_time()
         if time > 0:
             node.remove_virus(virus)
             success = False
             for _ in range(100):
                 tree = birth_death_tree(GC.bd_birth,
                                         GC.bd_death,
                                         birth_rate_sd=GC.bd_birth_sd,
                                         death_rate_sd=GC.bd_death_sd,
                                         max_time=time,
                                         repeat_until_success=True,
                                         rng=rng)
                 if tree.seed_node.num_child_nodes() > 1:
                     success = True
                     break
             assert success, "Failed to create non-empty Birth-Death tree after 100 attempts. Perhaps try a higher birth rate or lower death rate?"
             virus.set_time(virus.get_time() + tree.seed_node.edge_length)
             for c in tree.seed_node.child_node_iter():
                 GC.treenode_add_child(virus, c, node)
Exemple #13
0
 def get_edge_list():
     cn = relaxed_caveman_graph(GC.cave_num_cliques, GC.cave_clique_size, GC.cave_prob, seed=GC.random_number_seed)
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9)
     f.write('\n'.join(out).encode()); f.write(b'\n')
     f.close()
     GC.cn_communities = [{c*GC.cave_clique_size+i for i in range(GC.cave_clique_size)} for c in range(GC.cave_num_cliques)]
     f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9)
     f.write(str(GC.cn_communities).encode()); f.write(b'\n')
     f.close()
     GC.cn_communities = [{str(i) for i in c} for c in GC.cn_communities]
     return out
 def get_edge_list():
     du = GC.d_or_u == 'd'
     cn = random_partition_graph(GC.rpg_sizes, GC.rpg_p_in, GC.rpg_p_out, directed=du, seed=GC.random_number_seed)
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, GC.d_or_u)
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9)
     f.write('\n'.join(out).encode()); f.write(b'\n')
     f.close()
     f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9)
     f.write(str(cn.graph['partition']).encode()); f.write(b'\n')
     f.close()
     GC.cn_communities = [{str(n) for n in c} for c in cn.graph['partition']]
     return out
 def get_edge_list():
     cn = newman_watts_strogatz_graph(GC.num_cn_nodes,
                                      GC.nws_k,
                                      GC.nws_prob,
                                      seed=GC.random_number_seed)
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     return out
 def init():
     GC.msms_path = expanduser(GC.msms_path.strip())
     assert MF.modules[
         'ContactNetworkGenerator'].__name__ in GC.COMMUNITY_GENERATORS, "Must use a ContactNetworkGenerator that creates communities (%s)" % ', '.join(
             sorted(GC.COMMUNITY_GENERATORS))
     SeedSequence_Virus.init()
     SequenceEvolution_GTRGammaSeqGen.init()
     GC.community_seed_scaled_mutation_rate = float(
         GC.community_seed_scaled_mutation_rate)
     assert GC.community_seed_scaled_mutation_rate > 0, "community_seed_scaled_mutation_rate must be positive"
     assert isinstance(
         GC.community_seed_populations, list
     ), "community_seed_populations must be a list of positive integers"
     for i in range(len(GC.community_seed_populations)):
         GC.community_seed_populations[i] = int(
             GC.community_seed_populations[i])
         assert GC.community_seed_populations[
             i] > 0, "community_seed_populations must be a list of positive integers"
     assert isinstance(
         GC.community_seed_migration_rates, dict
     ), "community_seed_migration_rates must be a dictionary of dictionaries of floats"
     try:
         for i in range(len(GC.community_seed_populations)):
             for j in range(len(GC.community_seed_migration_rates)):
                 if i == j:
                     assert i not in GC.community_seed_migration_rates[
                         i] or float(
                             GC.community_seed_migration_rates[i][i]
                         ) == 0., "Non-zero self-migration rate found in community_seed_migration_rates"
                 else:
                     GC.community_seed_migration_rates[i][j] = float(
                         GC.community_seed_migration_rates[i][j])
                     assert GC.community_seed_migration_rates[i][
                         j] >= 0, "Migration rates in community_seed_migration_rates must be at least 0"
     except KeyError:
         assert False, "Malformed community_seed_migration_rates dictionary. See FAVITES Wiki for usage information"
     GC.check_seqgen_executable()
Exemple #17
0
 def get_edge_list():
     du = GC.d_or_u == 'd'
     cn = fast_gnp_random_graph(GC.num_cn_nodes,
                                GC.er_prob,
                                directed=du,
                                seed=GC.random_number_seed)
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, GC.d_or_u)
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     return out
    def sample_time():
        # no more nodes to infect
        if GC.contact_network.num_uninfected_nodes() == 0:
            GC.next_trans = None
            GC.end_time = GC.time
            return None
        # create priority queue
        if GC.trans_pq is None:
            GC.trans_pq = GC.SortedLinkedList()
            GC.trans_pq_v2trans = dict()
            GC.trans_susceptible = set()
        # attempt to fill priority queue
        if len(GC.trans_pq) == 0:
            for node in GC.contact_network.get_infected_nodes():
                for edge in GC.contact_network.get_edges_from(node):
                    neighbor = edge.get_to()
                    if not neighbor.is_infected():
                        GC.trans_susceptible.add(neighbor)
            while len(GC.trans_susceptible) > 0:
                v = GC.trans_susceptible.pop()
                infected_neighbors = [
                    edge.get_from()
                    for edge in GC.contact_network.get_edges_to(v)
                    if edge.get_from().is_infected()
                ]
                if len(infected_neighbors) > 0:
                    u = choice(infected_neighbors)
                    t = GC.time + exponential(
                        scale=1 / (GC.infection_rate * len(infected_neighbors))
                    )  # min of exponentials is exponential with sum of rates
                    GC.trans_pq.put(v, t)
                    GC.trans_pq_v2trans[v] = (u, v, t)
        # if failed to fill priority queue, simulation is done
        if len(GC.trans_pq) == 0:
            GC.next_trans = None
            GC.end_time = GC.time
            return None

        # get next transmission event
        v = GC.trans_pq.getFront()
        u, v, t = GC.trans_pq_v2trans[v]
        GC.next_trans = (u, v, t)
        del GC.trans_pq_v2trans[v]
        return t
 def subsample_transmission_network():
     nodes = {GC.contact_network.get_node(n) for n in GC.final_sequences}
     die = {}
     for u, v, t in GC.contact_network.get_transmissions():
         if u not in die:
             die[u] = 1
         else:
             die[u] += 1
         if v not in die:
             die[v] = 1
         else:
             die[v] += 1
     die = {n: die[n] for n in nodes if n in die}
     num_sample = GC.node_sample_fraction * len(die.keys())
     out = []
     while len(die) != 0 and len(out) < num_sample:
         n = GC.roll(die)
         out.append(n)
         die.pop(n)
     return out
 def finalize():
     if not hasattr(GC,'final_sequences'): # GC.final_sequences[cn_node][t] = set of (label,seq) tuples
         GC.final_sequences = {}
     if GC.errorfree_sequence_file.lower().endswith('.gz'):
         from gzip import open as gopen
         lines = [l.decode().strip() for l in gopen(GC.errorfree_sequence_file)]
     else:
         lines = [l.strip() for l in open(GC.errorfree_sequence_file)]
     lines = [l for l in lines if len(l) != 0]
     if len(lines) == 0:
         return
     seqs = GC.parseFASTA(lines)
     for ID,seq in seqs.items():
         v,n,t = ID.split('|'); t = float(t)
         if v == 'DUMMY':
             continue
         if n not in GC.final_sequences:
             GC.final_sequences[n] = {}
         if t not in GC.final_sequences[n]:
             GC.final_sequences[n][t] = []
         GC.final_sequences[n][t].append((v,seq))
 def get_edge_list():
     try:
         cn = random_degree_sequence_graph(GC.cn_degree_sequence,
                                           tries=GC.cn_tries,
                                           seed=GC.random_number_seed)
     except NetworkXUnfeasible:
         from os import chdir
         chdir(GC.START_DIR)
         assert False, "Contact network degree sequence is not graphical"
     except NetworkXError:
         from os import chdir
         chdir(GC.START_DIR)
         assert False, "NetworkX failed to produce graph after %d tries" % GC.cn_tries
     if GC.random_number_seed is not None:
         GC.random_number_seed += 1
     out = GC.nx2favites(cn, 'u')
     f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
               9)
     f.write('\n'.join(out).encode())
     f.write(b'\n')
     f.close()
     return out
Exemple #22
0
    def run(path, ORIG_CONFIG):
        '''
        Simulation driver. Even if you add your own modules, you probably shouldn't
        need to modify this function. The one clear exception would be if your
        module requires additional user input (e.g. custom evolution model modules),
        which would then require you to call it with the required arguments.
        '''

        # store starting directory
        GC.FAVITES_START_TIME = time()
        GC.FAVITES_DIR = path
        if GC.VERBOSE:
            print('[%s] FAVITES Driver starting' % datetime.now(), file=stderr)
        GC.START_DIR = getcwd()

        # load modules
        for module in MF.modules:
            MF.modules[module].init()
        LOG = MF.modules['Logging']

        # set up environment
        orig_dir = getcwd()
        try:
            makedirs(GC.out_dir)
        except:
            if 'FAVITES_DOCKER' not in environ:  # bypass error (Docker makes the folder automatically)
                if isdir(abspath(expanduser(GC.out_dir))):
                    if GC.VERBOSE:
                        print('[%s] Output directory exists: %s' %
                              (datetime.now(), environ['out_dir_print']),
                              file=stderr)
                    response = 'x'
                    while len(response) == 0 or response[0] not in {'y', 'n'}:
                        response = input(
                            "ERROR: Output directory exists. Overwrite? All contents will be deleted. (y/n) "
                        ).strip().lower()
                    if response[0] == 'y':
                        from shutil import rmtree
                        rmtree(GC.out_dir)
                        makedirs(GC.out_dir)
                    else:
                        exit(-1)
                else:
                    LOG.writeln("ERROR: Unable to create the output directory")
                    exit(-1)
        chdir(GC.out_dir)
        f = open('CONFIG.json', 'w')
        f.write(ORIG_CONFIG)
        f.close()

        # begin simulation
        printMessage(LOG)
        LOG.writeln(
            "========================   Simulation Process  ========================"
        )
        if GC.VERBOSE:
            print('[%s] Starting simulation' % datetime.now(), file=stderr)
        makedirs("error_free_files", exist_ok=True)
        makedirs("error_free_files/phylogenetic_trees", exist_ok=True)
        makedirs("error_prone_files", exist_ok=True)

        # create ContactNetwork object
        LOG.write("Loading contact network...")
        if GC.VERBOSE:
            print('[%s] Loading contact network' % datetime.now(), file=stderr)
        GC.cn_edge_list = MF.modules['ContactNetworkGenerator'].get_edge_list()
        LOG.writeln(" done")
        LOG.write("Creating ContactNetwork object...")
        if GC.VERBOSE:
            print('[%s] Initializing ContactNetwork object...' %
                  datetime.now(),
                  file=stderr)
        contact_network = MF.modules['ContactNetwork'](GC.cn_edge_list)
        assert isinstance(contact_network,
                          MF.module_abstract_classes['ContactNetwork']
                          ), "contact_network is not a ContactNetwork object"
        assert contact_network.num_nodes(
        ) > 1, "ContactNetwork must have at least 2 nodes"
        assert contact_network.num_edges(
        ) > 0, "ContactNetwork must have at least 1 edge"
        MF.modules['TransmissionNodeSample'].check_contact_network(
            contact_network)
        GC.contact_network = contact_network
        LOG.writeln(" done")

        # select seed nodes
        LOG.write("Selecting seed nodes...")
        if GC.VERBOSE:
            print('[%s] Selecting seed nodes' % datetime.now(), file=stderr)
        GC.seed_nodes = MF.modules['SeedSelection'].select_seeds()
        assert isinstance(GC.seed_nodes, list) or isinstance(
            GC.seed_nodes, set), "seed_nodes is not a list nor a set"
        for node in GC.seed_nodes:
            if GC.VERBOSE:
                print('[%s] Seed\tTime 0\tNode %s' %
                      (datetime.now(), str(node)),
                      file=stderr)
            assert isinstance(
                node, MF.module_abstract_classes['ContactNetworkNode']
            ), "seed_nodes contains items that are not ContactNetworkNode objects"
        LOG.writeln(" done")

        # infect seed nodes
        LOG.write("Infecting seed nodes...")
        if GC.VERBOSE:
            print('[%s] Infecting seed nodes' % datetime.now(), file=stderr)
        GC.root_viruses = []
        GC.seed_to_first_virus = {}
        f = gopen('seed_sequences.tsv.gz', 'wb', 9)
        for node in GC.seed_nodes:
            seq = MF.modules['SeedSequence'].generate()
            virus = MF.modules['TreeNode'](time=0.0,
                                           seq=seq,
                                           contact_network_node=node)
            f.write(('%s\t%s\n' % (virus.get_label(), seq)).encode())
            GC.root_viruses.append(virus)
            node.infect(0.0, virus)
            GC.contact_network.add_transmission(None, node, 0.0)
            GC.seed_to_first_virus[node] = virus
        f.write(b'\n')
        f.close()
        if isdir('seed_sequences'):
            rename('seed_sequences.tsv.gz',
                   'seed_sequences/seed_sequences.tsv.gz')
        LOG.writeln(" done")

        # iterative step of transmissions
        LOG.write("Performing transmission simulations...")
        if GC.VERBOSE:
            print('[%s] Performing transmission iterations' % datetime.now(),
                  file=stderr)
        GC.first_time_transmitting = {}
        while True:
            t = MF.modules['TransmissionTimeSample'].sample_time()
            if t is None or MF.modules['EndCriteria'].done():
                break
            assert t >= GC.time, "Transmission cannot go back in time!"
            u, v = MF.modules['TransmissionNodeSample'].sample_nodes(t)
            if u is None and v is None:
                break
            GC.time = t
            if u == v:  # u = v implies uninfection (recovery or death)
                u.uninfect()
                GC.contact_network.add_transmission(u, u, GC.time)
                continue
            elif u is None:  # u = None implies seed infection at time t > 0
                seq = MF.modules['SeedSequence'].generate()
                virus = MF.modules['TreeNode'](time=GC.time,
                                               seq=seq,
                                               contact_network_node=v)
                GC.root_viruses.append(virus)
                v.infect(GC.time, virus)
                GC.contact_network.add_transmission(None, v, GC.time)
                GC.seed_to_first_virus[v] = virus
                continue
            MF.modules['NodeEvolution'].evolve_to_current_time(u)
            MF.modules['NodeEvolution'].evolve_to_current_time(v)
            virus = MF.modules['SourceSample'].sample_virus(u)
            u.remove_virus(virus)
            if not u.is_infected():
                GC.contact_network.remove_from_infected(u)
            v.infect(GC.time, virus)
            GC.contact_network.add_to_infected(v)
            GC.contact_network.add_transmission(u, v, GC.time)
            if u not in GC.first_time_transmitting:
                GC.first_time_transmitting[u] = GC.time
        GC.transmissions = GC.contact_network.get_transmissions()
        assert isinstance(GC.transmissions,
                          list), "get_transmissions() did not return a list!"
        LOG.writeln(" done")

        # finalize global time
        LOG.write("Finalizing transmission simulations...")
        if GC.VERBOSE:
            print('[%s] Finalizing transmissions/evolution' % datetime.now(),
                  file=stderr)
        MF.modules['EndCriteria'].finalize_time()
        LOG.writeln(" done")

        # write transmission network as edge list
        LOG.write("Writing true transmission network to file...")
        f = gopen('error_free_files/transmission_network.txt.gz', 'wb', 9)
        f.write(
            ('\n'.join("%s\t%s\t%f" % e for e in GC.transmissions)).encode())
        f.write(b'\n')
        f.close()
        LOG.writeln(" done")
        LOG.writeln(
            "True transmission network was written to: %s/error_free_files/transmission_network.txt"
            % environ['out_dir_print'])
        if GC.VERBOSE:
            print('[%s] Wrote transmission network to file' % datetime.now(),
                  file=stderr)

        # perform patient sampling in time (on all infected nodes; will subsample from this later)
        LOG.write("Sampling patients in time...")
        GC.cn_sample_times = {}
        if GC.VERBOSE:
            print('[%s] Performing person sampling (sequencing)' %
                  datetime.now(),
                  file=stderr)
        for node in GC.contact_network.nodes_iter():
            num_times = MF.modules['NumTimeSample'].sample_num_times(node)
            assert num_times >= 0, "Encountered negative number of sampling events"
            times = MF.modules['TimeSample'].sample_times(node, num_times)
            for t in times:
                assert t <= GC.time, "Encountered a patient sampling time larger than the global end time"
            if len(times) != 0:
                GC.cn_sample_times[node] = times
                if GC.VERBOSE:
                    print('[%s] Node %s sampled at times %s' %
                          (datetime.now(), str(node), str(times)),
                          file=stderr)
            elif GC.VERBOSE:
                print('[%s] Node %s not sampled' % (datetime.now(), str(node)),
                      file=stderr)
        LOG.writeln(" done")

        # evolve to end time
        LOG.write("Evolving trees and sequences to end time...")
        nodes = [node for node in GC.contact_network.get_infected_nodes()]
        MF.modules['NodeEvolution'].evolve_to_current_time(None, finalize=True)
        for node in nodes:
            MF.modules['NodeEvolution'].evolve_to_current_time(node,
                                                               finalize=True)
            MF.modules['SequenceEvolution'].evolve_to_current_time(node)
        LOG.writeln(" done")

        # prune sampled trees
        LOG.write("Pruning sampled trees...")
        if GC.PRUNE_TREES:
            if GC.VERBOSE:
                print('[%s] Pruning sampled trees' % datetime.now(),
                      file=stderr)
            GC.prune_sampled_trees()
        GC.pruned_newick_trees_time = [e for e in GC.sampled_trees
                                       ]  # (rootvirus,treestr) tuples
        LOG.writeln(" done")

        # convert trees from unit of time to unit of mutation rate
        LOG.write("Converting trees from time to mutation rate...")
        if GC.VERBOSE:
            print('[%s] Converting sampled trees from time to mutation rate' %
                  datetime.now(),
                  file=stderr)
        GC.pruned_newick_trees = [
            (e[0], MF.modules['TreeUnit'].time_to_mutation_rate(e[1]))
            for e in GC.pruned_newick_trees_time
        ]
        LOG.writeln(" done")

        # merge cluster trees with seed tree (if applicable)
        LOG.write(
            "Merging true phylogenetic trees with true seed tree (if applicable)..."
        )
        GC.final_tree_to_root_seq = [
            e[0].get_seq() for i, e in enumerate(GC.pruned_newick_trees)
        ]
        GC.merged_trees, GC.merged_trees_time = MF.modules[
            'SeedSequence'].merge_trees()
        LOG.writeln(" done")

        # write phylogenetic trees (expected number of mutations) as Newick files
        LOG.write("Writing true phylogenetic tree(s) to file(s)...")
        if len(GC.merged_trees) == 0:
            for i in range(len(GC.pruned_newick_trees)):
                f = gopen(
                    'error_free_files/phylogenetic_trees/tree_%d.tre.gz' % i,
                    'wb', 9)
                f.write(GC.pruned_newick_trees[i][1].strip().encode())
                f.write(b'\n')
                f.close()
                f = gopen(
                    'error_free_files/phylogenetic_trees/tree_%d.time.tre.gz' %
                    i, 'wb', 9)
                f.write(GC.pruned_newick_trees_time[i][1].strip().encode())
                f.write(b'\n')
                f.close()
        else:
            for i in range(len(GC.merged_trees)):
                f = gopen(
                    'error_free_files/phylogenetic_trees/merged_tree_%d.tre.gz'
                    % i, 'wb', 9)
                f.write(GC.merged_trees[i].strip().encode())
                f.write(b'\n')
                f.close()
                f = gopen(
                    'error_free_files/phylogenetic_trees/merged_tree_%d.time.tre.gz'
                    % i, 'wb', 9)
                f.write(GC.merged_trees_time[i].strip().encode())
                f.write(b'\n')
                f.close()
        LOG.writeln(" done")
        LOG.writeln(
            "True phylogenetic trees were written to: %s/error_free_files/phylogenetic_trees"
            % environ['out_dir_print'])
        if GC.VERBOSE:
            print(
                '[%s] Wrote phylogenetic trees (expected number of mutations)'
                % datetime.now(),
                file=stderr)

        # finalize sequence data
        LOG.write("Finalizing sequence simulations...")
        if GC.VERBOSE:
            print('[%s] Finalizing sequences' % datetime.now(), file=stderr)
        MF.modules['SequenceEvolution'].finalize(
        )  # in case the module creates all sequences at the end
        LOG.writeln(" done")

        # write error-free sequence data
        LOG.write("Writing final sequence data to file...")
        f = gopen('error_free_files/sequence_data.fasta.gz', 'wb', 9)
        for cn_label in GC.final_sequences:
            for t in GC.final_sequences[cn_label]:
                for l, s in GC.final_sequences[cn_label][t]:
                    f.write((">%s\n%s\n" % (l, s)).encode())
        f.close()
        LOG.writeln(" done")
        LOG.writeln("True sequence data were written to: %s/error_free_files" %
                    environ['out_dir_print'])
        LOG.writeln()
        if GC.VERBOSE:
            print('[%s] Wrote true sequence data' % datetime.now(),
                  file=stderr)

        # introduce real data artifacts
        LOG.writeln(
            "\n=======================   Real Data Artifacts   ======================="
        )

        # subsample the contact network nodes and write sequences to file
        LOG.write("Subsampling contact network nodes...")
        if GC.VERBOSE:
            print('[%s] Subsampling contact network nodes' % datetime.now(),
                  file=stderr)
        GC.subsampled_nodes = MF.modules[
            'NodeAvailability'].subsample_transmission_network()
        if len(GC.subsampled_nodes) != 0:
            tmp = []
            rmv = []
            for node in GC.subsampled_nodes:
                cn_label = node.get_name()
                if cn_label in GC.final_sequences:
                    for t in GC.final_sequences[cn_label]:
                        for l, s in GC.final_sequences[cn_label][t]:
                            tmp.append((">%s\n%s\n" % (l, s)).encode())
                else:
                    rmv.append(node)
            for n in rmv:
                GC.subsampled_nodes.remove(n)
            if len(tmp) != 0:
                f = gopen(
                    'error_prone_files/sequence_data_subsampled_errorfree.fasta.gz',
                    'wb', 9)
                for e in tmp:
                    f.write(e)
                f.close()
        LOG.writeln(" done")

        # introduce sequencing error
        LOG.write("Simulating sequencing error...")
        for node in GC.subsampled_nodes:
            if GC.VERBOSE:
                print('[%s] Sequencing error for Node %s' %
                      (datetime.now(), str(node)),
                      file=stderr)
            MF.modules['Sequencing'].introduce_sequencing_error(node)
        MF.modules['Sequencing'].finalize()
        LOG.writeln(" done")
        LOG.writeln(
            "Error prone sequence data were written to: %s/error_prone_files" %
            environ['out_dir_print'])
        LOG.writeln()

        # return to original directory and finish
        chdir(orig_dir)
        if GC.VERBOSE:
            print('[%s] Outputting simulation information' % datetime.now(),
                  file=stderr)
        LOG.writeln(
            "\n===========================   Information   ==========================="
        )
        GC.FAVITES_OUTPUT_SIZE = 0
        for dirpath, dirnames, filenames in walk(GC.out_dir):
            for f in filenames:
                fp = join(dirpath, f)
                GC.FAVITES_OUTPUT_SIZE += getsize(fp)
        LOG.writeln("Output Size (bytes): %d" % GC.FAVITES_OUTPUT_SIZE)
        LOG.writeln("Execution Time (seconds): %d" %
                    (time() - GC.FAVITES_START_TIME))
        if GC.VERBOSE:
            print('[%s] Outputting list of citations' % datetime.now(),
                  file=stderr)
        LOG.writeln(
            "\n\n============================   Citations   ============================"
        )
        citations = set()
        for module in MF.modules:
            cite = MF.modules[module].cite()
            if isinstance(cite, str):
                citations.add(cite.strip())
            elif isinstance(cite, set) or isinstance(cite, list):
                for c in cite:
                    citations.add(c.strip())
        for citation in sorted(citations):
            LOG.writeln(citation)
        LOG.close()
        if GC.VERBOSE:
            print('[%s] FAVITES Driver finished' % datetime.now(), file=stderr)
    def get_edge_list():
        # set things up
        a = (GC.cng_m + 1) / (2 * GC.cng_m)
        probs = [0] + [
            2 * a / (i * (i + 1)) for i in range(1, GC.cng_m + 1)
        ]  # prepend 0 to make non-zero probabilities be indices 1 through M
        M = [
            m for m, n in enumerate(
                multinomial(len(GC.cng_N), probs, size=1)[0]) for _ in range(n)
        ]
        com = list()
        for i in range(len(M)):
            com.append(
                GC.nx2favites(
                    barabasi_albert_graph(GC.cng_N[i],
                                          M[i],
                                          seed=GC.random_number_seed), 'u'))
            if GC.random_number_seed is not None:
                GC.random_number_seed += 1

        # process disconnected BA graphs (one per community)
        nodes = list()
        node_lines = list()
        edge_lines = list()
        for i, g in enumerate(com):
            nodes.append(list())
            node_prefix = 'COM%d' % i
            for l in g:
                if len(l) == 0 or l[0] == '#':
                    continue
                parts = l.split()
                assert parts[0] in {'NODE', 'EDGE'
                                    }, "Invalid FAVITES edge list encountered"
                if parts[0] == 'NODE':
                    name = "%s-%s" % (node_prefix, parts[1])
                    nodes[-1].append(name)
                    node_lines.append("NODE\t%s\t%s" % (name, parts[2]))
                else:
                    u = "%s-%s" % (node_prefix, parts[1])
                    v = "%s-%s" % (node_prefix, parts[2])
                    edge_lines.append("EDGE\t%s\t%s\t%s\t%s" %
                                      (u, v, parts[3], parts[4]))

        # add edges between communities
        possible_num_er_edges = sum(len(c) for c in nodes)**2 - sum(
            len(c)**2 for c in nodes)
        if len(nodes) == 1:
            num_er_edges = 0  # only 1 community
        else:
            num_er_edges = 2 * binomial(
                possible_num_er_edges,
                GC.cng_p)  # multiply by 2 for bidirectionality
        er_edges = set()
        er_choice_indices = list(range(len(nodes)))
        while len(er_edges) != 2 * num_er_edges:
            i = choice(er_choice_indices)
            j = choice(er_choice_indices)
            while i == j:
                j = choice(er_choice_indices)
            u = choice(nodes[i])
            v = choice(nodes[j])
            er_edges.add((u, v))
            er_edges.add((v, u))
            edge_lines.append("EDGE\t%s\t%s\t.\tu" % (u, v))

        # output final graph
        out = node_lines + edge_lines
        f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir), 'wb',
                  9)
        f.write('\n'.join(out).encode())
        f.write(b'\n')
        f.close()
        return out
        '''
        edgelists = []
        for cng_i, cng in enumerate(local_cn_generators):
            MF.modules['ContactNetworkGenerator'] = MF.module_implementations['ContactNetworkGenerator'][cng['ContactNetworkGenerator']]['class']
            for param in MF.module_implementations['ContactNetworkGenerator'][cng['ContactNetworkGenerator']]['req']:
                setattr(GC,param,cng[param])
            MF.modules['ContactNetworkGenerator'].init()
            edgelists.append(MF.modules['ContactNetworkGenerator'].get_edge_list())
        g = {} # g[node]['attributes' or 'edges']
        GC.cn_communities = []
        total_num_nodes = 0
        out = []
        for edgelist_i,edgelist in enumerate(edgelists):
            GC.cn_communities.append([])
            node_prefix = "CNG%d-COM%d"%(GC.cng_community_num,edgelist_i)
            for line in edgelist:
                if len(line) == 0 or line[0] == '#':
                    continue
                parts = line.split()
                assert parts[0] in {'NODE','EDGE'}, "Invalid FAVITES edge list encountered"
                if parts[0] == 'NODE':
                    name = "%s-%s" % (node_prefix,parts[1])
                    assert name not in g, "Duplicate node name encountered"
                    g[name] = {'edges':[], 'attributes':parts[2]}
                    out.append("NODE\t%s\t%s" % (name,parts[2]))
                    GC.cn_communities[-1].append(name)
                else:
                    u = "%s-%s" % (node_prefix,parts[1])
                    assert u in g, "Encountered non-existant node name"
                    v = "%s-%s" % (node_prefix,parts[2])
                    assert v in g, "Encountered non-existant node name"
                    g[u]['edges'].append((v,parts[3],parts[4]))
            assert len(GC.cn_communities[-1]) != 0, "Encountered empty community"
            total_num_nodes += len(GC.cn_communities[-1])
        for u in g:
            for v,attr,du in g[u]['edges']:
                out.append("EDGE\t%s\t%s\t%s\t%s" % (u,v,attr,du))
        possible_across_edges = 0
        for i in range(len(GC.cn_communities)-1):
            for j in range(i+1,len(GC.cn_communities)):
                possible_across_edges += len(GC.cn_communities[i])*len(GC.cn_communities[j])
        num_across_edges = binomial(possible_across_edges,GC.cn_p_across)
        done = set()
        for _ in range(num_across_edges):
            i,j = sample(range(len(GC.cn_communities)),2)
            u,v = choice(GC.cn_communities[i]),choice(GC.cn_communities[j])
            while (u,v) in done:
                i,j = sample(range(len(GC.cn_communities)),2)
                u,v = choice(GC.cn_communities[i]),choice(GC.cn_communities[j])
            done.add((u,v))
            out.append("EDGE\t%s\t%s\t.\tu" % (u,v))
        f = gopen(expanduser("%s/contact_network.txt.gz" % GC.out_dir),'wb',9)
        f.write('\n'.join(out).encode()); f.write(b'\n')
        f.close()
        f = gopen(expanduser("%s/contact_network_partitions.txt.gz" % GC.out_dir),'wb',9)
        f.write(str(GC.cn_communities).encode()); f.write(b'\n')
        f.close()
        return out'''
        exit()
    def prep_GEMF():
        # write GEMF parameter file
        orig_dir = getcwd()
        GC.gemf_path = expanduser(GC.gemf_path.strip())
        makedirs(GC.gemf_out_dir, exist_ok=True)
        f = open(GC.gemf_out_dir + "/para.txt",'w')
        f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.hiv_ns_to_s) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ns_to_d) + "\n0\t0\t" + str(GC.hiv_s_to_i1_seed) + "\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_s_to_d) + "\n0\t0\t0\t" + str(GC.hiv_i1_to_i2) + "\t0\t0\t" + str(GC.hiv_i1_to_a1) + "\t0\t0\t0\t" + str(GC.hiv_i1_to_d) + "\n0\t0\t0\t0\t" + str(GC.hiv_i2_to_i3) + "\t0\t0\t" + str(GC.hiv_i2_to_a2) + "\t0\t0\t" + str(GC.hiv_i2_to_d) + "\n0\t0\t0\t0\t0\t" + str(GC.hiv_i3_to_i4) + "\t0\t0\t" + str(GC.hiv_i3_to_a3) + "\t0\t" + str(GC.hiv_i3_to_d) + "\n0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_i4_to_a4) + "\t" + str(GC.hiv_i4_to_d) + "\n0\t0\t" + str(GC.hiv_a1_to_i1) + "\t0\t0\t0\t0\t" + str(GC.hiv_a1_to_a2) + "\t0\t0\t" + str(GC.hiv_a1_to_d) + "\n0\t0\t0\t" + str(GC.hiv_a2_to_i2) + "\t0\t0\t0\t0\t" + str(GC.hiv_a2_to_a3) + "\t0\t" + str(GC.hiv_a2_to_d) + "\n0\t0\t0\t0\t" + str(GC.hiv_a3_to_i3) + "\t0\t0\t0\t0\t" + str(GC.hiv_a3_to_a4) + "\t" + str(GC.hiv_a3_to_d) + "\n0\t0\t0\t0\t0\t" + str(GC.hiv_a4_to_i4) + "\t0\t0\t0\t0\t" + str(GC.hiv_a4_to_d) + "\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n") # HIV-ART-specific
        f.write("[EDGED_TRAN_MATRIX]\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i1) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i2) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i3) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_i4) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a1) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a2) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a3) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.hiv_s_to_i1_by_a4) + "\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("[STATUS_BEGIN]\n0\n\n")
        infectious = ['I1','I2','I3','I4','A1','A2','A3','A4']
        f.write("[INDUCER_LIST]\n" + ' '.join([str(GC.gemf_state_to_num[i]) for i in infectious]) + "\n\n")
        f.write("[SIM_ROUNDS]\n1\n\n")
        f.write("[INTERVAL_NUM]\n1\n\n")
        f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n")
        f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n")
        f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) + "\n\n")
        f.write("[SHOW_INDUCER]\n1\n\n")
        f.write("[DATA_FILE]\n" + '\n'.join(["network.txt"]*len(infectious)) + "\n\n")
        f.write("[STATUS_FILE]\nstatus.txt\n\n")
        if GC.random_number_seed is not None:
            f.write("[RANDOM_SEED]\n%d\n\n"%GC.random_number_seed)
        f.write("[OUT_FILE]\noutput.txt")
        f.close()

        # write GEMF network file
        f = open(GC.gemf_out_dir + "/network.txt",'w')
        num2node = {}
        node2num = {}
        for edge in GC.contact_network.edges_iter():
            u = edge.get_from()
            v = edge.get_to()
            if u not in node2num:
                num = len(node2num) + 1
                node2num[u] = num
                num2node[num] = u
            if v not in node2num:
                num = len(node2num) + 1
                node2num[v] = num
                num2node[num] = v
            f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n')
        f.close()

        # write GEMF to original mapping
        f = open(GC.gemf_out_dir + "/gemf2orig.json",'w')
        f.write(str({num:num2node[num].get_name() for num in num2node}))
        f.close()

        # write GEMF status file (NS = 0, S = 1, I1 = 2, I2 = 3, I3 = 4, I4 = 5, A1 = 6, A2 = 7, A3 = 8, A4 = 9, D = 10)
        leftover = len(num2node)
        start_states = {'seed':[], 'other':[]}
        for s in infectious:
            k = "hiv_freq_%s"%s.lower()
            if isinstance(getattr(GC,k), float):
                n = int(len(num2node)*getattr(GC,k))
            else:
                n = getattr(GC,k)
            start_states['seed'] += [GC.gemf_state_to_num[s]]*n
            leftover -= n
        assert len(start_states['seed']) == len(GC.seed_nodes), "At time 0, A1+A2+A3+A4+I1+I2+I3+I4 = %d, but there are %d seed nodes. Fix hiv_freq_* parameters accordingly" % (len(start_states['seed']),len(GC.seed_nodes))
        for s in ['NS','S']:
            k = "hiv_freq_%s"%s.lower()
            if isinstance(getattr(GC,k), float):
                n = int(len(num2node)*getattr(GC,k))
            else:
                n = getattr(GC,k)
            start_states['other'] += [GC.gemf_state_to_num[s]]*n
            leftover -= n
        start_states['other'] += [GC.gemf_state_to_num['D']]*leftover
        shuffle(start_states['seed']); shuffle(start_states['other'])
        f = open(GC.gemf_out_dir + "/status.txt",'w')
        seeds = {seed for seed in GC.seed_nodes} # seed nodes are assumed to be in I1 and non-seeds to be in NS
        for num in sorted(num2node.keys()):
            node = num2node[num]
            if node in seeds:
                s = start_states['seed'].pop()
                f.write("%d\n"%s)
                node.gemf_state = s
            else:
                s = start_states['other'].pop()
                f.write("%d\n"%s)
                node.gemf_state = s
        f.close()

        # run GEMF
        chdir(GC.gemf_out_dir)
        try:
            call([GC.gemf_path], stdout=open("log.txt",'w'))
        except FileNotFoundError:
            chdir(GC.START_DIR)
            assert False, "GEMF executable was not found: %s" % GC.gemf_path
        chdir(orig_dir)

        # reload edge-based matrices for ease of use
        matrices = open(GC.gemf_out_dir + '/para.txt').read().strip()
        outside_infection_matrix = [[float(e) for e in l.split()] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices.index('\n\n[EDGED_TRAN_MATRIX]')].replace('[NODAL_TRAN_MATRIX]\n','').splitlines()]
        matrices = [[[float(e) for e in l.split()] for l in m.splitlines()] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices.index('\n\n[STATUS_BEGIN]')].replace('[EDGED_TRAN_MATRIX]\n','').split('\n\n')]
        matrices = {GC.gemf_state_to_num[infectious[i]]:matrices[i] for i in range(len(infectious))}
        matrices[GC.gemf_state_to_num['S']] = outside_infection_matrix

        # convert GEMF output to FAVITES transmission network format
        GC.transmission_num = 0
        GC.transmission_state = set() # 'node' and 'time'
        NUM_INFECTED = len(seeds)
        GC.transmission_file = []
        for line in open(GC.gemf_out_dir + "/output.txt"):
            t,rate,vNum,pre,post,num0,num1,num2,num3,num4,num5,num6,num7,num8,num9,num10,lists = [i.strip() for i in line.split()]
            pre,post = int(pre),int(post)
            vName = num2node[int(vNum)].get_name()
            lists = lists.split('],[')
            lists[0] += ']'
            lists[-1] = '[' + lists[-1]
            for i in range(1,len(lists)-1):
                if '[' not in lists[i]:
                    lists[i] = '[' + lists[i] + ']'
            lists = [eval(l) for l in lists]
            uNums = []
            for l in lists:
                uNums.extend(l)
            if post == GC.gemf_state_to_num['D']:
                NUM_INFECTED -= 1
                GC.transmission_file.append((vName,vName,float(t)))
                if GC.VERBOSE:
                    print('[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post],NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr)
            elif GC.gemf_num_to_state[pre] == 'S' and GC.gemf_num_to_state[post] == 'I1':
                NUM_INFECTED += 1
                v = num2node[int(vNum)]
                uNodes = [num2node[num] for num in uNums]
                uRates = [matrices[uNode.gemf_state][pre][post] for uNode in uNodes]
                die = {uNodes[i]:GC.prob_exp_min(i, uRates) for i in range(len(uNodes))}
                if len(die) != 0:
                    u = GC.roll(die) # roll die weighted by exponential infectious rates
                    uName = u.get_name()
                    if GC.VERBOSE:
                        print('[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,uName,GC.gemf_num_to_state[u.gemf_state],vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post],NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr)
                elif len(die) == 0 or u == v: # new seed
                    uName = None
                    if GC.VERBOSE:
                        print('[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d' % (datetime.now(),t,vName,NUM_INFECTED,len(num2node)-NUM_INFECTED), file=stderr)
                GC.transmission_file.append((uName,v.get_name(),float(t)))
            elif GC.VERBOSE:
                print('[%s] Transition\tTime %s\tNode %s (%s->%s)' % (datetime.now(),t,vName,GC.gemf_num_to_state[pre],GC.gemf_num_to_state[post]), file=stderr)
            num2node[int(vNum)].gemf_state = post
        assert len(GC.transmission_file) != 0, "GEMF didn't output any transmissions"
        GC.gemf_ready = True
        GC.gemf_num2node = num2node
    def prep_GEMF():
        # check for attributes in contact network nodes
        for node in GC.contact_network.nodes_iter():
            attr = node.get_attribute()
            assert 'MALE' in attr or 'FEMALE' in attr, "All nodes must have MALE or FEMALE in their attributes"
            assert not ('MALE' in attr and 'FEMALE'
                        in attr), "Nodes cannot be both MALE and FEMALE"
            if 'MALE' in attr:
                assert 'CIRCUMCISED' in attr or 'UNCIRCUMCISED' in attr, "MALE nodes must be either CIRCUMCISED or UNCIRCUMCISED"
            else:
                assert 'CIRCUMCISED' not in attr, "FEMALE nodes cannot be CIRCUMCISED"

        # write GEMF parameter file
        orig_dir = getcwd()
        GC.gemf_path = expanduser(GC.gemf_path.strip())
        makedirs(GC.gemf_out_dir, exist_ok=True)
        f = open(GC.gemf_out_dir + "/para.txt", 'w')
        f.write("[NODAL_TRAN_MATRIX]\n")
        f.write(
            "0\t" + str(GC.hiv_msu_to_mspc) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_msu_to_d) + "\n")
        f.write(
            "0\t0\t" + str(GC.hiv_mspc_to_msch) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mspc_to_d) + "\n")
        f.write(
            "0\t0\t0\t" + str(GC.hiv_msch_to_msc) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_msch_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_msc_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t" + str(GC.hiv_miah_to_mia) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_miah_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mia_to_mi1) + "\t" +
            str(GC.hiv_mia_to_mi2) + "\t" + str(GC.hiv_mia_to_mi3) + "\t" +
            str(GC.hiv_mia_to_mi4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mia_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi1_to_mi2) + "\t0\t0\t" +
            str(GC.hiv_mi1_to_mj1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mi1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi2_to_mi3) + "\t0\t0\t" +
            str(GC.hiv_mi2_to_mj2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mi2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mi3_to_mi4) +
            "\t0\t0\t" + str(GC.hiv_mi3_to_mj3) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mi3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_mi4_to_mj4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mi4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj1_to_mj2) +
            "\t0\t0\t" + str(GC.hiv_mj1_to_mt1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mj1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mj2_to_mj3) +
            "\t0\t0\t" + str(GC.hiv_mj2_to_mt2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mj2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_mj3_to_mj4) + "\t0\t0\t" + str(GC.hiv_mj3_to_mt3) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mj3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_mj4_to_mt4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mj4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt1_to_mi1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt1_to_mt2) +
            "\t0\t0\t" + str(GC.hiv_mt1_to_ma1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_mt1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt2_to_mi2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt2_to_mt3) +
            "\t0\t0\t" + str(GC.hiv_mt2_to_ma2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_mt2_to_d) + "\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt3_to_mi3) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt3_to_mt4) +
                "\t0\t0\t" + str(GC.hiv_mt3_to_ma3) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_mt3_to_d) + "\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_mt4_to_mi4) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_mt4_to_ma4) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_mt4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma1_to_mi1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma1_to_ma2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_ma1_to_d) + "\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma2_to_mi2) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_ma2_to_ma3) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_ma2_to_d) + "\n")
        f.write("0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma3_to_mi3) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_ma3_to_ma4) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
                str(GC.hiv_ma3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_ma4_to_mi4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_ma4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fs_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fia_to_fi1) + "\t" + str(GC.hiv_fia_to_fi2) + "\t" +
            str(GC.hiv_fia_to_fi3) + "\t" + str(GC.hiv_fia_to_fi4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fia_to_d) +
            "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fi1_to_fi2) + "\t0\t0\t" + str(GC.hiv_fi1_to_fj1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi1_to_d) +
            "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fi2_to_fi3) + "\t0\t0\t" + str(GC.hiv_fi2_to_fj2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fi3_to_fi4) + "\t0\t0\t" + str(GC.hiv_fi3_to_fj3) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fi3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fi4_to_fj4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_fi4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fj1_to_fj2) + "\t0\t0\t" + str(GC.hiv_fj1_to_ft1) +
            "\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fj2_to_fj3) + "\t0\t0\t" + str(GC.hiv_fj2_to_ft2) +
            "\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fj3_to_fj4) + "\t0\t0\t" + str(GC.hiv_fj3_to_ft3) +
            "\t0\t0\t0\t0\t0\t" + str(GC.hiv_fj3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fj4_to_ft4) + "\t0\t0\t0\t0\t" +
            str(GC.hiv_fj4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_ft1_to_fi1) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_ft1_to_ft2) + "\t0\t0\t" + str(GC.hiv_ft1_to_fa1) +
            "\t0\t0\t0\t" + str(GC.hiv_ft1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_ft2_to_fi2) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_ft2_to_ft3) + "\t0\t0\t" + str(GC.hiv_ft2_to_fa2) +
            "\t0\t0\t" + str(GC.hiv_ft2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_ft3_to_fi3) + "\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_ft3_to_ft4) + "\t0\t0\t" + str(GC.hiv_ft3_to_fa3) +
            "\t0\t" + str(GC.hiv_ft3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_ft4_to_fi4) + "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" +
            str(GC.hiv_ft4_to_fa4) + "\t" + str(GC.hiv_ft4_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fa1_to_fi1) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa1_to_fa2) +
            "\t0\t0\t" + str(GC.hiv_fa1_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fa2_to_fi2) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa2_to_fa3) +
            "\t0\t" + str(GC.hiv_fa2_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fa3_to_fi3) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa3_to_fa4) +
            "\t" + str(GC.hiv_fa3_to_d) + "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
            + str(GC.hiv_fa4_to_fi4) +
            "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t" + str(GC.hiv_fa4_to_d) +
            "\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
        )
        f.write("\n")
        infectious = [
            'MIAH', 'MIA', 'MI1', 'MI2', 'MI3', 'MI4', 'MJ1', 'MJ2', 'MJ3',
            'MJ4', 'MT1', 'MT2', 'MT3', 'MT4', 'MA1', 'MA2', 'MA3', 'MA4',
            'FIA', 'FI1', 'FI2', 'FI3', 'FI4', 'FJ1', 'FJ2', 'FJ3', 'FJ4',
            'FT1', 'FT2', 'FT3', 'FT4', 'FA1', 'FA2', 'FA3', 'FA4'
        ]
        f.write("[EDGED_TRAN_MATRIX]\n")
        for _ in infectious:
            by = _.lower()
            f.write(
                "0\t0\t0\t0\t0\t" +
                str(getattr(GC, 'hiv_msu_to_mia_by_' + by)) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t" +
                str(getattr(GC, 'hiv_mspc_to_mia_by_' + by)) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t" +
                str(getattr(GC, 'hiv_msch_to_miah_by_' + by)) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t" +
                str(getattr(GC, 'hiv_msc_to_mia_by_' + by)) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t"
                + str(getattr(GC, 'hiv_fs_to_fia_by_' + by)) +
                "\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n")
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write(
                "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n"
            )
            f.write('\n')
        f.write("[STATUS_BEGIN]\n0\n\n")
        f.write("[INDUCER_LIST]\n" +
                ' '.join([str(GC.gemf_state_to_num[i])
                          for i in infectious]) + "\n\n")
        f.write("[SIM_ROUNDS]\n1\n\n")
        f.write("[INTERVAL_NUM]\n1\n\n")
        f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n")
        f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n")
        f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) +
                "\n\n")
        f.write("[SHOW_INDUCER]\n1\n\n")
        f.write("[DATA_FILE]\n" +
                '\n'.join(["network.txt"] * len(infectious)) + "\n\n")
        f.write("[STATUS_FILE]\nstatus.txt\n\n")
        if GC.random_number_seed is not None:
            f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed)
        f.write("[OUT_FILE]\noutput.txt")
        f.close()

        # write GEMF network file
        f = open(GC.gemf_out_dir + "/network.txt", 'w')
        num2node = {}
        node2num = {}
        for edge in GC.contact_network.edges_iter():
            u = edge.get_from()
            v = edge.get_to()
            if u not in node2num:
                num = len(node2num) + 1
                node2num[u] = num
                num2node[num] = u
            if v not in node2num:
                num = len(node2num) + 1
                node2num[v] = num
                num2node[num] = v
            f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n')
        f.close()

        # write GEMF to original mapping
        f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w')
        f.write(str({num: num2node[num].get_name() for num in num2node}))
        f.close()

        # write GEMF status file (see above for the states)
        f = open(GC.gemf_out_dir + "/status.txt", 'w')
        seeds = {seed
                 for seed in GC.seed_nodes
                 }  # seed nodes are assumed to be in acute infection
        for num in sorted(num2node.keys()):
            node = num2node[num]
            attr = node.get_attribute()
            if node in seeds:
                if 'MALE' in attr:
                    f.write(str(GC.gemf_state_to_num['MIA']) +
                            "\n")  # PopART-specific
                    node.gemf_state = GC.gemf_state_to_num['MIA']
                else:
                    f.write(str(GC.gemf_state_to_num['FIA']) +
                            "\n")  # PopART-specific
                    node.gemf_state = GC.gemf_state_to_num['FIA']
            else:
                if 'MALE' in attr:
                    if 'CIRCUMCISED' in attr:
                        f.write(str(GC.gemf_state_to_num['MSC']) +
                                "\n")  # PopART-specific
                        node.gemf_state = GC.gemf_state_to_num['MSC']
                    else:
                        f.write(str(GC.gemf_state_to_num['MSU']) +
                                "\n")  # PopART-specific
                        node.gemf_state = GC.gemf_state_to_num['MSU']
                else:
                    f.write(str(GC.gemf_state_to_num['FS']) +
                            "\n")  # PopART-specific
                    node.gemf_state = GC.gemf_state_to_num['FS']
        f.close()

        # run GEMF
        chdir(GC.gemf_out_dir)
        try:
            call([GC.gemf_path], stdout=open("log.txt", 'w'))
        except FileNotFoundError:
            chdir(GC.START_DIR)
            assert False, "GEMF executable was not found: %s" % GC.gemf_path
        chdir(orig_dir)

        # reload edge-based matrices for ease of use
        matrices = open(GC.gemf_out_dir + '/para.txt').read().strip()
        matrices = [[
            [float(e) for e in l.split()] for l in m.splitlines()
        ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices.
                            index('\n\n[STATUS_BEGIN]')].replace(
                                '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')]
        matrices = {
            GC.gemf_state_to_num[infectious[i]]: matrices[i]
            for i in range(len(infectious))
        }

        # convert GEMF output to FAVITES transmission network format
        GC.transmission_num = 0
        GC.transmission_state = set()  # 'node' and 'time'
        GC.transmission_file = []
        for line in open(GC.gemf_out_dir + "/output.txt"):
            parts = [i.strip() for i in line.split()]
            t = parts[0]
            rate = parts[1]
            vNum = parts[2]
            pre = int(parts[3])
            post = int(parts[4])
            lists = parts[-1]
            lists = lists.split('],[')
            lists[0] += ']'
            lists[-1] = '[' + lists[-1]
            for i in range(1, len(lists) - 1):
                if '[' not in lists[i]:
                    lists[i] = '[' + lists[i] + ']'
            lists = [eval(l) for l in lists]
            uNums = []
            for l in lists:
                uNums.extend(l)
            if post == GC.gemf_state_to_num['D']:
                vName = num2node[int(vNum)].get_name()
                GC.transmission_file.append((vName, vName, float(t)))
            elif len(lists[0]) == 0:
                v = num2node[int(vNum)]
                uNodes = [num2node[num] for num in uNums]
                uRates = [
                    matrices[uNode.gemf_state][pre][post] for uNode in uNodes
                ]
                die = {
                    uNodes[i]: GC.prob_exp_min(i, uRates)
                    for i in range(len(uNodes))
                }
                if len(die) != 0:
                    uName = GC.roll(die).get_name(
                    )  # roll die weighted by exponential infectious rates
                elif len(die) == 0 or u == v:  # new seed
                    uName = None
                GC.transmission_file.append((uName, v.get_name(), float(t)))
            num2node[int(vNum)].gemf_state = post
        assert len(
            GC.transmission_file) != 0, "GEMF didn't output any transmissions"
        GC.gemf_ready = True
    def prep_GEMF():
        # write GEMF parameter file
        orig_dir = getcwd()
        GC.gemf_path = expanduser(GC.gemf_path.strip())
        makedirs(GC.gemf_out_dir, exist_ok=True)
        f = open(GC.gemf_out_dir + "/para.txt", 'w')
        f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.seir_beta_seed) +
                "\t0\t0\n0\t0\t" + str(GC.seir_lambda) + "\t0\n0\t0\t0\t" +
                str(GC.seir_delta) + "\n0\t0\t0\t0\n\n")  # SEIR-specific
        f.write(
            "[EDGED_TRAN_MATRIX]\n0\t" + str(GC.seir_beta_by_i) +
            "\t0\t0\n0\t0\t0\t0\n0\t0\t0\t0\n0\t0\t0\t0\n\n")  # SEIR-specific
        f.write("[STATUS_BEGIN]\n0\n\n")
        f.write("[INDUCER_LIST]\n" + str(GC.gemf_state_to_num['I']) + "\n\n")
        f.write("[SIM_ROUNDS]\n1\n\n")
        f.write("[INTERVAL_NUM]\n1\n\n")
        f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n")
        f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n")
        f.write("[DIRECTED]\n" + str(int(GC.contact_network.is_directed())) +
                "\n\n")
        f.write("[SHOW_INDUCER]\n1\n\n")
        f.write("[DATA_FILE]\nnetwork.txt\n\n")
        f.write("[STATUS_FILE]\nstatus.txt\n\n")
        if GC.random_number_seed is not None:
            f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed)
        f.write("[OUT_FILE]\noutput.txt")
        f.close()

        # write GEMF network file
        f = open(GC.gemf_out_dir + "/network.txt", 'w')
        num2node = {}
        node2num = {}
        for edge in GC.contact_network.edges_iter():
            u = edge.get_from()
            v = edge.get_to()
            if u not in node2num:
                num = len(node2num) + 1
                node2num[u] = num
                num2node[num] = u
            if v not in node2num:
                num = len(node2num) + 1
                node2num[v] = num
                num2node[num] = v
            f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n')
        f.close()

        # write GEMF to original mapping
        f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w')
        f.write(str({num: num2node[num].get_name() for num in num2node}))
        f.close()

        # write GEMF status file (0 = S, 1 = E, 2 = I, 3 = R)
        f = open(GC.gemf_out_dir + "/status.txt", 'w')
        seeds = {seed for seed in GC.seed_nodes}
        for num in sorted(num2node.keys()):
            node = num2node[num]
            if node in seeds:
                f.write(str(GC.gemf_state_to_num['I']) + "\n")  # SEIR-specific
                node.gemf_state = GC.gemf_state_to_num['I']
            else:
                f.write(str(GC.gemf_state_to_num['S']) + "\n")  # SEIR-specific
                node.gemf_state = GC.gemf_state_to_num['S']
        f.close()

        # run GEMF
        chdir(GC.gemf_out_dir)
        try:
            call([GC.gemf_path], stdout=open("log.txt", 'w'))
        except FileNotFoundError:
            chdir(GC.START_DIR)
            assert False, "GEMF executable was not found: %s" % GC.gemf_path
        chdir(orig_dir)

        # reload edge-based matrices for ease of use
        matrices = open(GC.gemf_out_dir + '/para.txt').read().strip()
        outside_infection_matrix = [[
            float(e) for e in l.split()
        ] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices.
                            index('\n\n[EDGED_TRAN_MATRIX]')].replace(
                                '[NODAL_TRAN_MATRIX]\n', '').splitlines()]
        matrices = [[
            [float(e) for e in l.split()] for l in m.splitlines()
        ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices.
                            index('\n\n[STATUS_BEGIN]')].replace(
                                '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')]
        infectious = ['I']
        matrices = {
            GC.gemf_state_to_num['S']: outside_infection_matrix,
            GC.gemf_state_to_num['I']: matrices[0]
        }

        # convert GEMF output to FAVITES transmission network format
        GC.transmission_num = 0
        GC.transmission_state = set()  # 'node' and 'time'
        NUM_INFECTED = len(seeds)
        GC.transmission_file = []
        for line in open(GC.gemf_out_dir + "/output.txt"):
            t, rate, vNum, pre, post, num0, num1, num2, num3, lists = [
                i.strip() for i in line.split()
            ]
            pre, post = int(pre), int(post)
            vName = num2node[int(vNum)].get_name()
            lists = lists.split('],[')
            lists[0] += ']'
            lists[-1] = '[' + lists[-1]
            for i in range(1, len(lists) - 1):
                if '[' not in lists[i]:
                    lists[i] = '[' + lists[i] + ']'
            lists = [eval(l) for l in lists]
            uNums = []
            for l in lists:
                uNums.extend(l)
            if post == GC.gemf_state_to_num['R']:
                NUM_INFECTED -= 1
                GC.transmission_file.append((vName, vName, float(t)))
                if GC.VERBOSE:
                    print(
                        '[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d'
                        % (datetime.now(), t, vName, GC.gemf_num_to_state[pre],
                           GC.gemf_num_to_state[post], NUM_INFECTED,
                           len(num2node) - NUM_INFECTED),
                        file=stderr)
            elif post == GC.gemf_state_to_num['E']:
                NUM_INFECTED += 1
                v = num2node[int(vNum)]
                uNodes = [num2node[num] for num in uNums]
                uRates = [
                    matrices[uNode.gemf_state][pre][post] for uNode in uNodes
                ]
                die = {
                    uNodes[i]: GC.prob_exp_min(i, uRates)
                    for i in range(len(uNodes))
                }
                if len(die) != 0:
                    u = GC.roll(
                        die
                    )  # roll die weighted by exponential infectious rates
                    uName = u.get_name()
                    if GC.VERBOSE:
                        print(
                            '[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d'
                            % (datetime.now(), t, uName,
                               GC.gemf_num_to_state[u.gemf_state], vName,
                               GC.gemf_num_to_state[pre],
                               GC.gemf_num_to_state[post], NUM_INFECTED,
                               len(num2node) - NUM_INFECTED),
                            file=stderr)
                elif len(die) == 0 or u == v:  # new seed
                    uName = None
                    if GC.VERBOSE:
                        print(
                            '[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d'
                            % (datetime.now(), t, vName, NUM_INFECTED,
                               len(num2node) - NUM_INFECTED),
                            file=stderr)
                GC.transmission_file.append((uName, v.get_name(), float(t)))
            elif GC.VERBOSE:
                print('[%s] Transition\tTime %s\tNode %s (%s->%s)' %
                      (datetime.now(), t, vName, GC.gemf_num_to_state[pre],
                       GC.gemf_num_to_state[post]),
                      file=stderr)
            num2node[int(vNum)].gemf_state = post
        assert len(
            GC.transmission_file) != 0, "GEMF didn't output any transmissions"
        GC.gemf_ready = True
Exemple #27
0
 def init():
     GC.pangea_module_check()
Exemple #28
0
 def merge_trees():
     return GC.merge_trees_seqgen()
    def prep_GEMF():
        # check for gender attribute in contact network nodes
        for node in GC.contact_network.nodes_iter():
            attr = node.get_attribute()
            assert 'MALE' in attr or 'FEMALE' in attr, "All nodes must have MALE or FEMALE in their attributes"
            assert not ('MALE' in attr and 'FEMALE'
                        in attr), "Nodes cannot be both MALE and FEMALE"

        # write GEMF parameter file
        orig_dir = getcwd()
        GC.gemf_path = expanduser(GC.gemf_path.strip())
        makedirs(GC.gemf_out_dir, exist_ok=True)
        f = open(GC.gemf_out_dir + "/para.txt", 'w')
        f.write("[NODAL_TRAN_MATRIX]\n0\t" + str(GC.gon_ma_to_ms) +
                "\t0\t0\t0\t0\t0\t0\n" + str(GC.gon_ms_to_ma) + "\t0\t" +
                str(GC.gon_ms_to_mis_seed) + "\t" +
                str(GC.gon_ms_to_mia_seed) + "\t0\t0\t0\t0\n" +
                str(GC.gon_mis_to_ma) + "\t" + str(GC.gon_mis_to_ms) +
                "\t0\t0\t0\t0\t0\t0\n" + str(GC.gon_mia_to_ma) + "\t" +
                str(GC.gon_mia_to_ms) + "\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t" +
                str(GC.gon_fa_to_fs) + "\t0\t0\n0\t0\t0\t0\t" +
                str(GC.gon_fs_to_fa) + "\t0\t" + str(GC.gon_fs_to_fis_seed) +
                "\t" + str(GC.gon_fs_to_fia_seed) + "\n0\t0\t0\t0\t" +
                str(GC.gon_fis_to_fa) + "\t" + str(GC.gon_fis_to_fs) +
                "\t0\t0\n0\t0\t0\t0\t" + str(GC.gon_fia_to_fa) + "\t" +
                str(GC.gon_fia_to_fs) + "\t0\t0\n\n")  # Gonorrhea-specific
        f.write("[EDGED_TRAN_MATRIX]\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_mis) +
            "\t" + str(GC.gon_ms_to_mia_by_mis) +
            "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t"
            + str(GC.gon_fs_to_fis_by_mis) + "\t" +
            str(GC.gon_fs_to_fia_by_mis) +
            "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_mia) +
            "\t" + str(GC.gon_ms_to_mia_by_mia) +
            "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t"
            + str(GC.gon_fs_to_fis_by_mia) + "\t" +
            str(GC.gon_fs_to_fia_by_mia) +
            "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_fis) +
            "\t" + str(GC.gon_ms_to_mia_by_fis) +
            "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t"
            + str(GC.gon_fs_to_fis_by_fis) + "\t" +
            str(GC.gon_fs_to_fia_by_fis) +
            "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write(
            "0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t" + str(GC.gon_ms_to_mis_by_fia) +
            "\t" + str(GC.gon_ms_to_mia_by_fia) +
            "\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t"
            + str(GC.gon_fs_to_fis_by_fia) + "\t" +
            str(GC.gon_fs_to_fia_by_fia) +
            "\n0\t0\t0\t0\t0\t0\t0\t0\n0\t0\t0\t0\t0\t0\t0\t0\n\n")
        f.write("[STATUS_BEGIN]\n0\n\n")
        infectious = ['MIS', 'MIA', 'FIS', 'FIA']
        f.write("[INDUCER_LIST]\n" +
                ' '.join([str(GC.gemf_state_to_num[e])
                          for e in infectious]) + "\n\n")
        f.write("[SIM_ROUNDS]\n1\n\n")
        f.write("[INTERVAL_NUM]\n1\n\n")
        f.write("[MAX_TIME]\n" + str(GC.end_time) + "\n\n")
        f.write("[MAX_EVENTS]\n" + str(GC.C_INT_MAX) + "\n\n")
        f.write("[DIRECTED]\n1\n\n")
        f.write("[SHOW_INDUCER]\n1\n\n")
        f.write(
            "[DATA_FILE]\nnetwork.txt\nnetwork.txt\nnetwork.txt\nnetwork.txt\n\n"
        )
        f.write("[STATUS_FILE]\nstatus.txt\n\n")
        if GC.random_number_seed is not None:
            f.write("[RANDOM_SEED]\n%d\n\n" % GC.random_number_seed)
        f.write("[OUT_FILE]\noutput.txt")
        f.close()

        # write GEMF network file
        f = open(GC.gemf_out_dir + "/network.txt", 'w')
        num2node = {}
        node2num = {}
        for edge in GC.contact_network.edges_iter():
            u = edge.get_from()
            v = edge.get_to()
            if u not in node2num:
                num = len(node2num) + 1
                node2num[u] = num
                num2node[num] = u
            if v not in node2num:
                num = len(node2num) + 1
                node2num[v] = num
                num2node[num] = v
            f.write(str(node2num[u]) + '\t' + str(node2num[v]) + '\n')
        f.close()

        # write GEMF to original mapping
        f = open(GC.gemf_out_dir + "/gemf2orig.json", 'w')
        f.write(str({num: num2node[num].get_name() for num in num2node}))
        f.close()

        # write GEMF status file (MA = 0, MS = 1, MIS = 2, MIA = 3, FA = 4, FS = 5, FIS = 6, FIA = 7)
        f = open(GC.gemf_out_dir + "/status.txt", 'w')
        seeds = {seed
                 for seed in GC.seed_nodes
                 }  # seed nodes are assumed to be asymptomatic
        for num in sorted(num2node.keys()):
            node = num2node[num]
            attr = node.get_attribute()
            if node in seeds:
                if 'MALE' in attr:
                    f.write(str(GC.gemf_state_to_num['MIA']) +
                            "\n")  # Gonorrhea-specific
                    node.gemf_state = GC.gemf_state_to_num['MIA']
                else:
                    f.write(str(GC.gemf_state_to_num['FIA']) +
                            "\n")  # Gonorrhea-specific
                    node.gemf_state = GC.gemf_state_to_num['FIA']
            else:
                if 'MALE' in attr:
                    f.write(str(GC.gemf_state_to_num['MS']) +
                            "\n")  # Gonorrhea-specific
                    node.gemf_state = GC.gemf_state_to_num['MS']
                else:
                    f.write(str(GC.gemf_state_to_num['FS']) +
                            "\n")  # Gonorrhea-specific
                    node.gemf_state = GC.gemf_state_to_num['FS']
        f.close()

        # run GEMF
        chdir(GC.gemf_out_dir)
        try:
            call([GC.gemf_path], stdout=open("log.txt", 'w'))
        except FileNotFoundError:
            chdir(GC.START_DIR)
            assert False, "GEMF executable was not found: %s" % GC.gemf_path
        chdir(orig_dir)

        # reload edge-based matrices for ease of use
        matrices = open(GC.gemf_out_dir + '/para.txt').read().strip()
        outside_infection_matrix = [[
            float(e) for e in l.split()
        ] for l in matrices[matrices.index('[NODAL_TRAN_MATRIX]'):matrices.
                            index('\n\n[EDGED_TRAN_MATRIX]')].replace(
                                '[NODAL_TRAN_MATRIX]\n', '').splitlines()]
        matrices = [[
            [float(e) for e in l.split()] for l in m.splitlines()
        ] for m in matrices[matrices.index('[EDGED_TRAN_MATRIX]'):matrices.
                            index('\n\n[STATUS_BEGIN]')].replace(
                                '[EDGED_TRAN_MATRIX]\n', '').split('\n\n')]
        matrices = {
            GC.gemf_state_to_num[infectious[i]]: matrices[i]
            for i in range(len(infectious))
        }
        matrices[GC.gemf_state_to_num['MS']] = outside_infection_matrix
        matrices[GC.gemf_state_to_num['FS']] = outside_infection_matrix

        # convert GEMF output to FAVITES transmission network format
        GC.transmission_num = 0
        GC.transmission_state = set()  # 'node' and 'time'
        NUM_INFECTED = len(seeds)
        GC.transmission_file = []
        for line in open(GC.gemf_out_dir + "/output.txt"):
            t, rate, vNum, pre, post, num0, num1, num2, num3, num4, num5, num6, num7, lists = [
                i.strip() for i in line.split()
            ]
            pre, post = int(pre), int(post)
            vName = num2node[int(vNum)].get_name()
            lists = lists.split('],[')
            lists[0] += ']'
            lists[-1] = '[' + lists[-1]
            for i in range(1, len(lists) - 1):
                if '[' not in lists[i]:
                    lists[i] = '[' + lists[i] + ']'
            lists = [eval(l) for l in lists]
            uNums = []
            for l in lists:
                uNums.extend(l)
            if post in {
                    GC.gemf_state_to_num[i]
                    for i in ['MA', 'MS', 'FA', 'FS']
            } and pre in {
                    GC.gemf_state_to_num[i]
                    for i in ['MIS', 'MIA', 'FIS', 'FIA']
            }:
                NUM_INFECTED -= 1
                GC.transmission_file.append((vName, vName, float(t)))
                if GC.VERBOSE:
                    print(
                        '[%s] Uninfection\tTime %s\tNode %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d'
                        % (datetime.now(), t, vName, GC.gemf_num_to_state[pre],
                           GC.gemf_num_to_state[post], NUM_INFECTED,
                           len(num2node) - NUM_INFECTED),
                        file=stderr)
            elif GC.gemf_num_to_state[pre] in [
                    'MS', 'FS'
            ] and GC.gemf_num_to_state[post] in ['MIS', 'MIA', 'FIS', 'FIA']:
                NUM_INFECTED += 1
                v = num2node[int(vNum)]
                uNodes = [num2node[num] for num in uNums]
                uRates = [
                    matrices[uNode.gemf_state][pre][post] for uNode in uNodes
                ]
                die = {
                    uNodes[i]: GC.prob_exp_min(i, uRates)
                    for i in range(len(uNodes))
                }
                if len(die) != 0:
                    u = GC.roll(
                        die
                    )  # roll die weighted by exponential infectious rates
                    uName = u.get_name()
                    if GC.VERBOSE:
                        print(
                            '[%s] Infection\tTime %s\tFrom Node %s (%s)\tTo Node %s (%s->%s)\tTotal Infected: %d\tTotal Uninfected: %d'
                            % (datetime.now(), t, uName,
                               GC.gemf_num_to_state[u.gemf_state], vName,
                               GC.gemf_num_to_state[pre],
                               GC.gemf_num_to_state[post], NUM_INFECTED,
                               len(num2node) - NUM_INFECTED),
                            file=stderr)
                elif len(die) == 0 or u == v:  # new seed
                    uName = None
                    if GC.VERBOSE:
                        print(
                            '[%s] Seed\tTime %s\tNode %s\tTotal Infected: %d\tTotal Uninfected: %d'
                            % (datetime.now(), t, vName, NUM_INFECTED,
                               len(num2node) - NUM_INFECTED),
                            file=stderr)
                GC.transmission_file.append((uName, v.get_name(), float(t)))
            elif GC.VERBOSE:
                print('[%s] Transition\tTime %s\tNode %s (%s->%s)' %
                      (datetime.now(), t, vName, GC.gemf_num_to_state[pre],
                       GC.gemf_num_to_state[post]),
                      file=stderr)
            num2node[int(vNum)].gemf_state = post
        assert len(
            GC.transmission_file) != 0, "GEMF didn't output any transmissions"
        GC.gemf_ready = True
 def init():
     SeedSequence_Virus.init()
     SequenceEvolution_GTRGammaSeqGen.init()
     GC.seed_height = float(GC.seed_height)
     assert GC.seed_height > 0, "seed_height must be positive"
     GC.check_seqgen_executable()