def main(species, outPath, base_path, samples): samples = f_utils.set_path_to_files_glob(samples, 'TAB_lenDist_summary.txt') datout, window, tot_c, mirs_dat, mirs, mirs_c = get_data_from_file(samples, species) write_raw_counts_table(datout, window, outPath) all_wind = windows_to_norm_counts(datout, window, tot_c) RPMM_mir_100 = mirs_over_thresh(all_wind, 100, window, species) out_name = write_output(all_wind, 'RPMM_all.csv', outPath) out_name = write_output(RPMM_mir_100, 'RPMM_mirs_over_100.csv', outPath) mir_wind = windows_to_norm_counts(mirs_dat, mirs, mirs_c) RPMMM_mir_50 = mirs_over_thresh(mir_wind, 50, mirs, species) out_name = write_output(mir_wind, 'RPMMM_all.csv', outPath) out_name = write_output(RPMMM_mir_50, 'RPMMM_mirs_over_50.csv', outPath) print 'DONE!\n' create_boxcox_trans(out_name) if os.path.exists('{}/conditions.csv'.format(base_path)): print 'Generating sample correlation heatmap...' cmd = 'Rscript {}/sample_correlation.R {} {}'.format(os.path.dirname(__file__), out_name, '{}/conditions.csv'.format(base_path)) os.system(cmd) print 'DONE!\n' print 'Generating PCA...' cmd = 'Rscript {}/pca.R {} {}'.format(os.path.dirname(__file__), out_name, '{}/conditions.csv'.format(base_path)) os.system(cmd) else: print 'Generating sample correlation heatmap...' cmd = 'Rscript {}/sample_correlation.R {}'.format(os.path.dirname(__file__), out_name) os.system(cmd) print 'DONE!\n' print 'Generating PCA...' cmd = 'Rscript {}/pca.R {}'.format(os.path.dirname(__file__), out_name) os.system(cmd)
def main(basePath, outPath, samples): samples = f_utils.set_path_to_files_glob(samples, 'stats') out_di = mapping_stats_dict(samples) out_di = calculate_additional_stats(out_di) line_head_li = output_line_headers() cond_di = check_for_conditions_file(basePath) write_mapping_file(out_di, outPath, line_head_li, cond_di) create_map_stats_image(os.path.dirname(__file__), outPath)
def main(species, outPath, base_path, name, threshold, samples): samples = f_utils.set_path_to_files_glob(samples, 'TAB_3p_summary_yRNA.txt') datout, window, tot_c = get_data_from_file(samples, species) all_wind = windows_to_norm_counts(datout, window, tot_c) out_name = write_output(all_wind, '{}_all.csv'.format(name), outPath) if threshold: over_thresh = windows_over_thresh(all_wind, threshold, window, species) out_name = write_output(over_thresh, '{}_over_{}.csv'.format(name, threshold), outPath)
def main(species, outPath, samples): samples = f_utils.set_path_to_files_glob(samples, 'TAB_lenDist_summary.txt') datout, window, tot_c, mirs_dat, mirs, mirs_c = get_data_from_file(samples, species) all_wind = windows_to_norm_counts(datout, window, tot_c) RPMM_mir_100 = mirs_over_thresh(all_wind, 100, window, species) out_name = write_output(all_wind, 'RPMM_all.csv', outPath) out_name = write_output(RPMM_mir_100, 'RPMM_mirs_over_100.csv', outPath) mir_wind = windows_to_norm_counts(mirs_dat, mirs, mirs_c) RPMMM_mir_50 = mirs_over_thresh(mir_wind, 50, mirs, species) out_name = write_output(mir_wind, 'RPMMM_all.csv', outPath) out_name = write_output(RPMMM_mir_50, 'RPMMM_mirs_over_50.csv', outPath) cmd = 'Rscript {}/sample_correlation.R {}'.format(os.path.dirname(__file__), out_name) os.system(cmd)
def main(outPath, samples): samples = f_utils.set_path_to_files_glob(samples, "stats") out_di = mapping_stats_dict(samples) out_di = calculate_additional_stats(out_di) line_head_li = output_line_headers() write_mapping_file(out_di, outPath, line_head_li)
def main(outPath, samples): samples = f_utils.set_path_to_files_glob(samples, 'ead_length_histo') len_di, lengths = read_lengths_dict(samples) out_name = write_length_distribution(outPath, len_di, lengths) create_length_dist_image(os.path.dirname(__file__), out_name)