Ejemplo n.º 1
0
def main():
    if len(sys.argv) != 7:
        usage()
    pred_dir = sys.argv[1]
    helper.check_dir_exist(pred_dir)
    true_segment_dir = sys.argv[2]
    helper.check_dir_exist(true_segment_dir)
    ct = sys.argv[3]
    outDir = sys.argv[4]
    helper.make_dir(outDir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[5])
    num_score_bins = helper.get_command_line_integer(sys.argv[6])
    print "Done getting command line arguments"
    # first get the upper bounds for the score bins
    (reverse_lower_bound_list,
     upper_bound_score_list) = get_score_bins(num_score_bins)
    print "Get the bounds of posterior probabilities that we will set for each of the bin"
    # get the count of true positives and false positives, etc. across all regions in the genome
    total_tp_fp_df = get_tp_fp_data_all_regions(true_segment_dir, pred_dir,
                                                reverse_lower_bound_list, ct,
                                                num_chromHMM_state)
    print "Done processing all the files corresponding to all the regions in the genome"
    # calculate tpr and fpr values for each of the state
    save_fn = os.path.join(outDir, 'tpr_fpr_all_states.txt.gz')
    calculate_tpr_fpr(total_tp_fp_df, num_chromHMM_state, save_fn)
    print "Done calculating true positive rates and false positive rates in all bins"
def main():
	if len(sys.argv) != 6:
		usage()
	cg_dir = sys.argv[1]
	helper.check_dir_exist(cg_dir)
	out_dir = sys.argv[2]
	helper.make_dir(out_dir)
	num_chromHMM_model = helper.get_command_line_integer(sys.argv[3])
	num_score_bins = helper.get_command_line_integer(sys.argv[4])
	cell_type_list_fn = sys.argv[5]
	ct_list = helper.get_list_from_line_seperated_file(cell_type_list_fn)
	helper.check_file_exist(cell_type_list_fn)
	print "Done getting command line arguments"
	calculate_summary_staistics_across_ct(cg_dir, out_dir, num_chromHMM_model, num_score_bins, ct_list)
	print "Done!"
Ejemplo n.º 3
0
def main():
    if len(sys.argv) != 4:
        usage()
    avg_state_dir = sys.argv[1]
    helper.check_dir_exist(avg_state_dir)
    out_dir = sys.argv[2]
    helper.make_dir(out_dir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
    print "Done getting command line arguments"
    calculate_hist_parallel(avg_state_dir, out_dir, num_chromHMM_state)
Ejemplo n.º 4
0
def main():
    if len(sys.argv) != 4:
        usage()
    all_ct_hist_dir = sys.argv[1]
    helper.check_dir_exist(all_ct_hist_dir)
    out_dir = sys.argv[2]
    helper.make_dir(out_dir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
    print "Done getting command line arguments"
    average_histogram_across_all_ct(all_ct_hist_dir, out_dir,
                                    num_chromHMM_state)
    print "Done!"
Ejemplo n.º 5
0
def main():
	if len(sys.argv) != 7:
		usage()
	ct_pos_dir = sys.argv[1]
	helper.check_dir_exist(ct_pos_dir)
	out_dir = sys.argv[2]
	helper.make_dir(out_dir)
	num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
	ct_name = sys.argv[4]
	prefix_pos_fn = sys.argv[5]
	suffix_pos_fn = sys.argv[6]
	print "Done getting command line arguments"
	calculate_hist_parallel(ct_pos_dir, out_dir, num_chromHMM_state, ct_name, prefix_pos_fn, suffix_pos_fn)
Ejemplo n.º 6
0
def main():
	if len(sys.argv) != 7:
		usage()
	cg_dir = sys.argv[1]
	helper.check_dir_exist(cg_dir)
	out_dir = sys.argv[2]
	helper.make_dir(out_dir)
	state_annotation_fn = sys.argv[3]
	helper.check_file_exist(state_annotation_fn)
	state_annot_df = read_state_annot_fn(state_annotation_fn)
	ct_list_fn = sys.argv[4]
	helper.check_file_exist(ct_list_fn)
	ct_list = helper.get_list_from_line_seperated_file(ct_list_fn)
	num_chromHMM_state = helper.get_command_line_integer(sys.argv[5])
	igv_track_name = sys.argv[6]
	print "Done getting command line arguments"
	get_average_state_assign_matrix(cg_dir, ct_list, num_chromHMM_state, out_dir)
	print "Done getting the representative state semgentation for the cellg group"
	draw_genome_pos_list = ['chr5_15']
	# create_igv_format_bed(out_dir, state_annot_df, draw_genome_pos_list, igv_track_name)
	print "Done!"
def main():
    if len(sys.argv) != 7:
        usage()
    train_sampled_data_fn = sys.argv[1]
    helper.check_file_exist(train_sampled_data_fn)
    outDir = sys.argv[2]
    helper.make_dir(outDir)
    all_ct_posterior_folder = sys.argv[3]
    helper.check_dir_exist(all_ct_posterior_folder)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[4])
    validate_ct = sys.argv[5]
    all_ct_list_fn = sys.argv[6]
    print "Done getting command line arguments"
    # get all cell types
    ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct)
    print ct_list
    # call all cell types
    call_cross_validation_functions(validate_ct, ct_list, outDir,
                                    train_sampled_data_fn,
                                    all_ct_posterior_folder,
                                    num_chromHMM_state)
Ejemplo n.º 8
0
def main():
    if len(sys.argv) != 8:
        usage()
    train_sampled_data_fn = sys.argv[1]
    helper.check_file_exist(train_sampled_data_fn)
    outDir = sys.argv[2]
    helper.make_dir(outDir)
    all_ct_segment_folder = sys.argv[3]
    helper.check_dir_exist(all_ct_segment_folder)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[4])
    validate_ct = sys.argv[5]
    train_mode = sys.argv[6]
    all_ct_list_fn = sys.argv[7]
    print "Done getting command line arguments"
    # get the list of all genomic positions used to segment the genome for our model training (we exclude chromosome Y in all analysis)
    gen_pos_list = get_genomic_positions_list(all_ct_segment_folder)
    # get all cell types
    ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct)
    # call all cell types
    call_cross_validation_functions(validate_ct, ct_list, outDir,
                                    train_sampled_data_fn,
                                    all_ct_segment_folder, num_chromHMM_state,
                                    gen_pos_list, train_mode)