def main(): if len(sys.argv) != 7: usage() pred_dir = sys.argv[1] helper.check_dir_exist(pred_dir) true_segment_dir = sys.argv[2] helper.check_dir_exist(true_segment_dir) ct = sys.argv[3] outDir = sys.argv[4] helper.make_dir(outDir) num_chromHMM_state = helper.get_command_line_integer(sys.argv[5]) num_score_bins = helper.get_command_line_integer(sys.argv[6]) print "Done getting command line arguments" # first get the upper bounds for the score bins (reverse_lower_bound_list, upper_bound_score_list) = get_score_bins(num_score_bins) print "Get the bounds of posterior probabilities that we will set for each of the bin" # get the count of true positives and false positives, etc. across all regions in the genome total_tp_fp_df = get_tp_fp_data_all_regions(true_segment_dir, pred_dir, reverse_lower_bound_list, ct, num_chromHMM_state) print "Done processing all the files corresponding to all the regions in the genome" # calculate tpr and fpr values for each of the state save_fn = os.path.join(outDir, 'tpr_fpr_all_states.txt.gz') calculate_tpr_fpr(total_tp_fp_df, num_chromHMM_state, save_fn) print "Done calculating true positive rates and false positive rates in all bins"
def main(): if len(sys.argv) != 6: usage() cg_dir = sys.argv[1] helper.check_dir_exist(cg_dir) out_dir = sys.argv[2] helper.make_dir(out_dir) num_chromHMM_model = helper.get_command_line_integer(sys.argv[3]) num_score_bins = helper.get_command_line_integer(sys.argv[4]) cell_type_list_fn = sys.argv[5] ct_list = helper.get_list_from_line_seperated_file(cell_type_list_fn) helper.check_file_exist(cell_type_list_fn) print "Done getting command line arguments" calculate_summary_staistics_across_ct(cg_dir, out_dir, num_chromHMM_model, num_score_bins, ct_list) print "Done!"
def main(): if len(sys.argv) != 4: usage() avg_state_dir = sys.argv[1] helper.check_dir_exist(avg_state_dir) out_dir = sys.argv[2] helper.make_dir(out_dir) num_chromHMM_state = helper.get_command_line_integer(sys.argv[3]) print "Done getting command line arguments" calculate_hist_parallel(avg_state_dir, out_dir, num_chromHMM_state)
def main(): if len(sys.argv) != 4: usage() all_ct_hist_dir = sys.argv[1] helper.check_dir_exist(all_ct_hist_dir) out_dir = sys.argv[2] helper.make_dir(out_dir) num_chromHMM_state = helper.get_command_line_integer(sys.argv[3]) print "Done getting command line arguments" average_histogram_across_all_ct(all_ct_hist_dir, out_dir, num_chromHMM_state) print "Done!"
def main(): if len(sys.argv) != 7: usage() ct_pos_dir = sys.argv[1] helper.check_dir_exist(ct_pos_dir) out_dir = sys.argv[2] helper.make_dir(out_dir) num_chromHMM_state = helper.get_command_line_integer(sys.argv[3]) ct_name = sys.argv[4] prefix_pos_fn = sys.argv[5] suffix_pos_fn = sys.argv[6] print "Done getting command line arguments" calculate_hist_parallel(ct_pos_dir, out_dir, num_chromHMM_state, ct_name, prefix_pos_fn, suffix_pos_fn)
def main(): if len(sys.argv) != 7: usage() cg_dir = sys.argv[1] helper.check_dir_exist(cg_dir) out_dir = sys.argv[2] helper.make_dir(out_dir) state_annotation_fn = sys.argv[3] helper.check_file_exist(state_annotation_fn) state_annot_df = read_state_annot_fn(state_annotation_fn) ct_list_fn = sys.argv[4] helper.check_file_exist(ct_list_fn) ct_list = helper.get_list_from_line_seperated_file(ct_list_fn) num_chromHMM_state = helper.get_command_line_integer(sys.argv[5]) igv_track_name = sys.argv[6] print "Done getting command line arguments" get_average_state_assign_matrix(cg_dir, ct_list, num_chromHMM_state, out_dir) print "Done getting the representative state semgentation for the cellg group" draw_genome_pos_list = ['chr5_15'] # create_igv_format_bed(out_dir, state_annot_df, draw_genome_pos_list, igv_track_name) print "Done!"
def main(): if len(sys.argv) != 7: usage() train_sampled_data_fn = sys.argv[1] helper.check_file_exist(train_sampled_data_fn) outDir = sys.argv[2] helper.make_dir(outDir) all_ct_posterior_folder = sys.argv[3] helper.check_dir_exist(all_ct_posterior_folder) num_chromHMM_state = helper.get_command_line_integer(sys.argv[4]) validate_ct = sys.argv[5] all_ct_list_fn = sys.argv[6] print "Done getting command line arguments" # get all cell types ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct) print ct_list # call all cell types call_cross_validation_functions(validate_ct, ct_list, outDir, train_sampled_data_fn, all_ct_posterior_folder, num_chromHMM_state)
def main(): if len(sys.argv) != 8: usage() train_sampled_data_fn = sys.argv[1] helper.check_file_exist(train_sampled_data_fn) outDir = sys.argv[2] helper.make_dir(outDir) all_ct_segment_folder = sys.argv[3] helper.check_dir_exist(all_ct_segment_folder) num_chromHMM_state = helper.get_command_line_integer(sys.argv[4]) validate_ct = sys.argv[5] train_mode = sys.argv[6] all_ct_list_fn = sys.argv[7] print "Done getting command line arguments" # get the list of all genomic positions used to segment the genome for our model training (we exclude chromosome Y in all analysis) gen_pos_list = get_genomic_positions_list(all_ct_segment_folder) # get all cell types ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct) # call all cell types call_cross_validation_functions(validate_ct, ct_list, outDir, train_sampled_data_fn, all_ct_segment_folder, num_chromHMM_state, gen_pos_list, train_mode)