def read_pickle_write_to_file(summary_fullpath, chr_list, fnames_dic, filter_events=False): ''' Open a summary textfile, then individually open a pickle and write the contents to file. ''' # Get keynames found in pickle file. # Each keyname will be a row written to file. _, psi_median_str, log_score_str, sample_name_str, \ counts_00_str, counts_10_str, counts_01_str, counts_11_str, \ assigned_counts_0_str, assigned_counts_1_str, \ percent_accepted_str, group_str, pval_str, event_str \ = get_psi_dic_keynames(full_keynames=True) writecount = 0 with open(summary_fullpath, 'wb') as writefile: writer = csv.writer(writefile, delimiter='\t') # Write header header = [ event_str, pval_str, sample_name_str, group_str, counts_00_str, counts_10_str, counts_01_str, counts_11_str, assigned_counts_0_str, assigned_counts_1_str, psi_median_str, percent_accepted_str, log_score_str ] writer.writerow(header) for chromo in chr_list: pickle_fullpath_list = fnames_dic[chromo] for pickle_path in pickle_fullpath_list: psi_info_dic = read_pickle(pickle_path) if filter_events == True: ''' Filter events. If pval == 'NA', then skip the pickle file and go to the next one. ''' if 'NA' in psi_info_dic[pval_str]: continue row = [] for key in header: ''' # Dic contains both lists and strings. But we want to only have one column per keyvalue. Therefore, we collapse lists into comma separated values (CSV). ''' if len(psi_info_dic[key]) == 1: row.append(psi_info_dic[key][0]) elif len(psi_info_dic[key]) > 1: # Convert each element in list to string # so we can join it by commas. psi_info_dic[key] = [str(i) for i in psi_info_dic[key]] row.append(','.join(psi_info_dic[key])) writer.writerow(row) writecount += 1 return writecount
def read_pickle_write_to_file(summary_fullpath, chr_list, fnames_dic, filter_events=False): ''' Open a summary textfile, then individually open a pickle and write the contents to file. ''' # Get keynames found in pickle file. # Each keyname will be a row written to file. _, psi_median_str, log_score_str, sample_name_str, \ counts_00_str, counts_10_str, counts_01_str, counts_11_str, \ assigned_counts_0_str, assigned_counts_1_str, \ percent_accepted_str, group_str, pval_str, event_str \ = get_psi_dic_keynames(full_keynames=True) writecount = 0 with open(summary_fullpath, 'wb') as writefile: writer = csv.writer(writefile, delimiter='\t') # Write header header = [event_str, pval_str, sample_name_str, group_str, counts_00_str, counts_10_str, counts_01_str, counts_11_str, assigned_counts_0_str, assigned_counts_1_str, psi_median_str, percent_accepted_str, log_score_str] writer.writerow(header) for chromo in chr_list: pickle_fullpath_list = fnames_dic[chromo] for pickle_path in pickle_fullpath_list: psi_info_dic = read_pickle(pickle_path) if filter_events==True: ''' Filter events. If pval == 'NA', then skip the pickle file and go to the next one. ''' if 'NA' in psi_info_dic[pval_str]: continue row = [] for key in header: ''' # Dic contains both lists and strings. But we want to only have one column per keyvalue. Therefore, we collapse lists into comma separated values (CSV). ''' if len(psi_info_dic[key]) == 1: row.append(psi_info_dic[key][0]) elif len(psi_info_dic[key]) > 1: # Convert each element in list to string # so we can join it by commas. psi_info_dic[key] = [str(i) for i in psi_info_dic[key]] row.append(','.join(psi_info_dic[key])) writer.writerow(row) writecount += 1 return writecount
def main(): if len(sys.argv) < 2: print('Pickle file from t_test_miso_output.py and output '\ '.txt filename must be specified in command line.') sys.exit() pickle_path = sys.argv[1] writefile_path = sys.argv[2] chr_list = create_chromo_list() # Read pickle file to get fnames_dic # This only contains filenames, no data. fnames_dic = read_pickle(pickle_path) # Read and write to file. read_pickle_write_to_file(writefile_path, chr_list, fnames_dic, filter_events=False) print('Summary file saved in: %s' %writefile_path)
def main(): if len(sys.argv) < 2: print('Pickle file from t_test_miso_output.py and output '\ '.txt filename must be specified in command line.') sys.exit() pickle_path = sys.argv[1] writefile_path = sys.argv[2] chr_list = create_chromo_list() # Read pickle file to get fnames_dic # This only contains filenames, no data. fnames_dic = read_pickle(pickle_path) # Read and write to file. read_pickle_write_to_file(writefile_path, chr_list, fnames_dic, filter_events=False) print('Summary file saved in: %s' % writefile_path)
def main(): parser = OptionParser() parser.add_option('-1', '--group1_file', dest='group_1_samplenames_file', help='Filename containing group 1 sample names (PCa)') parser.add_option('-2', '--group2_file', dest='group_2_samplenames_file', help='Filename containing group 2 sample names (NEPC)') parser.add_option('-d', '--main_directory', dest='main_dir', help='Main directory containing miso output results.') parser.add_option('-o', '--output_directory', dest='output_dir', help='Output directory of t-test results.') parser.add_option('-O', '--output_filename', dest='output_fname', help='Output filename of the t-test results.') parser.add_option('-m', '--min_counts', type='int', dest='min_counts', help='Minimum junction read counts to be considered '\ 'into the t-test. Best practices says 10.') # Parse options (options, _) = parser.parse_args() # Define constants from options group_1_samplenames_file = options.group_1_samplenames_file group_2_samplenames_file = options.group_2_samplenames_file main_dir = options.main_dir output_dir = options.output_dir output_fname = options.output_fname min_counts = options.min_counts # Define constants summary_fullpath = os.path.join(output_dir, output_fname) # Get sample names from textfile. group_1_samples = get_sample_names_from_file(group_1_samplenames_file) group_2_samples = get_sample_names_from_file(group_2_samplenames_file) # Create list of chromosomes. chr_list = create_chromo_list(prefix='chr') # chr_list = ['chr11'] # Subset list for only those that contain miso outputs. group_1_samples = check_if_empty_dir(main_dir, group_1_samples, chr_list) group_2_samples = check_if_empty_dir(main_dir, group_2_samples, chr_list) # Init fnames dic fnames_dic = {} # Run on multiple threads. q = Queue() process_list = [] for chromo in chr_list: print('Sending %s job to core...' %chromo) p = Process(target=t_test_and_pickle, args=(fnames_dic, chromo, output_dir, group_1_samples, group_2_samples, main_dir, q, min_counts)) process_list.append(p) p.start() for chromo in chr_list: fnames_dic.update(q.get()) # Wait for all threads to be done before continuing. for p in process_list: p.join() print('Completed %s jobs.' %len(chr_list)) # Write fnames_dic as pickle file. pickle_filename = ''.join([output_fname, '_filenames_dic.pickle']) fnames_savepath = os.path.join(output_dir, pickle_filename) print('Saving filenames_dic.pickle to %s' %fnames_savepath) pickle_path = save_dic_as_pickle(fnames_dic, fnames_savepath) # Write information from pickle to textfile. print('Writing information from pickle to textfile.') # Read pickle file to get fnames_dic fnames_dic = read_pickle(pickle_path) # Read and write to file. read_pickle_write_to_file(summary_fullpath, chr_list, fnames_dic, filter_events=True) print('Summary file saved in: %s' %summary_fullpath)
def main(): parser = OptionParser() parser.add_option('-1', '--group1_file', dest='group_1_samplenames_file', help='Filename containing group 1 sample names (PCa)') parser.add_option('-2', '--group2_file', dest='group_2_samplenames_file', help='Filename containing group 2 sample names (NEPC)') parser.add_option('-d', '--main_directory', dest='main_dir', help='Main directory containing miso output results.') parser.add_option('-o', '--output_directory', dest='output_dir', help='Output directory of t-test results.') parser.add_option('-O', '--output_filename', dest='output_fname', help='Output filename of the t-test results.') parser.add_option('-m', '--min_counts', type='int', dest='min_counts', help='Minimum junction read counts to be considered '\ 'into the t-test. Best practices says 10.') # Parse options (options, _) = parser.parse_args() # Define constants from options group_1_samplenames_file = options.group_1_samplenames_file group_2_samplenames_file = options.group_2_samplenames_file main_dir = options.main_dir output_dir = options.output_dir output_fname = options.output_fname min_counts = options.min_counts # Define constants summary_fullpath = os.path.join(output_dir, output_fname) # Get sample names from textfile. group_1_samples = get_sample_names_from_file(group_1_samplenames_file) group_2_samples = get_sample_names_from_file(group_2_samplenames_file) # Create list of chromosomes. chr_list = create_chromo_list(prefix='chr') # chr_list = ['chr11'] # Subset list for only those that contain miso outputs. group_1_samples = check_if_empty_dir(main_dir, group_1_samples, chr_list) group_2_samples = check_if_empty_dir(main_dir, group_2_samples, chr_list) # Init fnames dic fnames_dic = {} # Run on multiple threads. q = Queue() process_list = [] for chromo in chr_list: print('Sending %s job to core...' % chromo) p = Process(target=t_test_and_pickle, args=(fnames_dic, chromo, output_dir, group_1_samples, group_2_samples, main_dir, q, min_counts)) process_list.append(p) p.start() for chromo in chr_list: fnames_dic.update(q.get()) # Wait for all threads to be done before continuing. for p in process_list: p.join() print('Completed %s jobs.' % len(chr_list)) # Write fnames_dic as pickle file. pickle_filename = ''.join([output_fname, '_filenames_dic.pickle']) fnames_savepath = os.path.join(output_dir, pickle_filename) print('Saving filenames_dic.pickle to %s' % fnames_savepath) pickle_path = save_dic_as_pickle(fnames_dic, fnames_savepath) # Write information from pickle to textfile. print('Writing information from pickle to textfile.') # Read pickle file to get fnames_dic fnames_dic = read_pickle(pickle_path) # Read and write to file. read_pickle_write_to_file(summary_fullpath, chr_list, fnames_dic, filter_events=True) print('Summary file saved in: %s' % summary_fullpath)