chrom_dict[('2', 0)] = chpr.ChrmProf("RRRRRSSSSSTTTTTUUUUUVVVVV") chrom_dict[('2', 1)] = chpr.ChrmProf("RRRRRSSSSSTTTTTUUUUUVVVVV") chrom_dict[('3', 0)] = chpr.ChrmProf("XXXXYYYYZZZZ") chrom_dict[('3', 1)] = chpr.ChrmProf("XXXXYYYYZZZZ") constants_dict = dict() constants_dict['mut_types'] = ['amp', 'rem', 'inv'] constants_dict['mut_size_mean'] = 5 constants_dict['mut_size_var'] = 2 constants_dict['mut_count_mean'] = 3 constants_dict['mut_count_var'] = 1 constants_dict['cov'] = 20 constants_dict['read_len'] = 5 gp = gnpr.GeneProf(chrom_dict, constants_dict) t = sm.Tree(edge_list, gp) # t.print_tree_info() t.print_node_relation() print('root node index:', t.rootNode.index) print('inorder traversal:', t.in_order_traversal(t.rootNode)) print('preorder traversal:', t.pre_order_traversal(t.rootNode)) print('\n\n') print('test add mutations:') t.add_mutations_along_edges(t.rootNode) # t.print_tree_info() t.print_node_info()
def main(argv): args = get_args(argv) # input arguments m = args['m'] n = args['n'] num_mutes = args['num_mutes'] directory = os.path.dirname(os.path.realpath(__file__)) # for num_mutes in [10, 50, 100]: # for m in [1, 3, 5, 10]: # for n in [2, 3, 4, 5]: # print 'n:', n, 'm:', m, 'num_mutes:', num_mutes size_mutes = args['size_mutes'] metaFile = args['meta_file'] output_folder = args['output_folder'] constants_dict = dict() constants_dict['mut_types'] = ['amp', 'rem', 'inv'] constants_dict[ 'exp_mut_size'] = size_mutes # default exp_mut_size is 5745000 constants_dict['exp_mut_count'] = num_mutes / (2 * n - 2) constants_dict['cov'] = 20 constants_dict['read_len'] = 300 constants_dict['num_patients'] = 5 # remove chrom_dict later chrom_dict = dict() chrom_dict[('1', 0)] = chpr.ChrmProf(248956422) chrom_dict[('1', 1)] = chpr.ChrmProf(248956422) chrom_dict[('2', 0)] = chpr.ChrmProf(242193529) chrom_dict[('2', 1)] = chpr.ChrmProf(242193529) chrom_dict[('3', 0)] = chpr.ChrmProf(198295559) chrom_dict[('3', 1)] = chpr.ChrmProf(198295559) # sub_folder_name = 'n_' + str(n) + '_m_' + str(m) + '_l_' + str(num_mutes) for patient_idx in range(1, 1 + constants_dict['num_patients']): patient_folder_name = 'patient' + str(patient_idx) # outputFolder = directory + '/sim_data' + '/' + sub_folder_name + '/' + patient_folder_name outputFolder = output_folder + '/' + patient_folder_name # clean up existing files under outputFolder if os.path.exists(outputFolder): shutil.rmtree(outputFolder) os.makedirs(outputFolder) l = random_get_tree(n) # list edge_list = get_edges(l) gp = gnpr.GeneProf(chrom_dict, constants_dict) t = Tree(edge_list, gp) geneprof_list = list() t.add_mutations_along_edges(t.rootNode, geneprof_list) generate_t(t, 'T.dot', outputFolder) U = random_get_usages(m, 2 * n - 1) l, sv_cn_idx_dict = get_bp_copy_num_idx_dict(t, n, constants_dict) r, seg_cn_idx_dict, seg_bgn_idx_dict, seg_end_idx_dict = get_seg_copy_num_idx_dict( t, n) C = generate_c(t, n, constants_dict) c_p, c_m = generate_seg_cp_paternal(t, n) F = generate_f(U, C) a, h, mate_dict = get_a_h_mate_dict(t, n, constants_dict) output_tsv(U, '/U.tsv', outputFolder) output_tsv(C, '/C.tsv', outputFolder) output_tsv(F, '/F.tsv', outputFolder) generate_s(metaFile, t, l, sv_cn_idx_dict, r, seg_cn_idx_dict, seg_bgn_idx_dict, seg_end_idx_dict, F, U, C, c_p, c_m, a, h, mate_dict, outputFolder)