def t_test_and_pickle(fnames_dic, chromo, output_dir, group_1_samples,
                      group_2_samples, main_dir, queue_obj, min_counts):
    '''
    Combines several modules together into one so that the process
    can be easily multithreaded. 
    
    Return a dictionary containing chromosomes as keynames as fnames as values. 
    '''
    # Define constants
    pval_str = 'pval'
    event_str = 'event'
    # Define output dic
    # DEBUG
    fnames_dic = {}

    # Create directory to store pickled dictionary.
    make_dir(os.path.join(output_dir, chromo))
    '''
    # Get list of AS events that need to be t-tested.
    # Run the function on the lists separately to ensure
    # that each list contains at least one element.
    # This means our master_fnames_list is guaranteed to
    # have one sample in each group. 
    '''
    group_1_fnames_list = get_all_fnames(group_1_samples, main_dir, chromo)
    group_2_fnames_list = get_all_fnames(group_2_samples, main_dir, chromo)
    master_fnames_list = group_1_fnames_list + group_2_fnames_list

    # Remove repeats
    master_fnames_list = list(set(master_fnames_list))
    # master_fnames_size = len(master_fnames_list)
    # Do t-test between the two groups.
    fnames_pickled_list = []
    count = 0

    for fname in master_fnames_list:
        count += 1
        # Get dictionary containing psi information for all samples.
        psi_info_dic, _ = get_psi_dic_across_samples(fname, group_1_samples,
                                                     group_2_samples, main_dir,
                                                     chromo, output_dir,
                                                     min_counts)
        # Add pval and event to dic
        psi_info_dic[pval_str] = [t_test_psi_info(psi_info_dic)]
        # Remove .miso from fname to get event name.
        psi_info_dic[event_str] = [fname.split('.')[0]]
        # Save dictionary as a pickle file.
        # add .pickle to fname
        pickled_fname = ''.join([fname, '.pickle'])
        output_fullpath = os.path.join(output_dir, chromo, pickled_fname)
        fnames_pickled_list.append(
            save_dic_as_pickle(psi_info_dic, output_fullpath))
    # save fnames list to output dic
    if chromo not in fnames_dic:
        fnames_dic[chromo] = fnames_pickled_list
    else:
        print('Warning, overwriting fnames_list in %s' % chromo)
    print('T-tested %s events in %s' % (count, chromo))
    queue_obj.put(fnames_dic)  # For multithreading
Example #2
0
def consolidate_miso_across_samples(main_dir, sample_dir_names_list, 
                                    chromo, master_fnames_list, 
                                    output_path):
    '''
    For each file in master_fnames_list:
    Read inside main_dir/sample_dir/chromo/misofile for all sample_dirs.
    1) read the header and write a combined header to the output_path
    (meaning adding up all the counts).
    2) write sampled_psi and log_score to output_path for each
    sample. 
    
    TODO: is it weird to put csv writer obj into a function?
    '''
    # This will be inefficient, because I will open each file twice...
    file_count = 0
    for f in master_fnames_list:
        # Construct fullpath for output file, make directory if does not exist.
        # Path will look like output_path/chr1/
        chr_out_path = make_dir(os.path.join(output_path, chromo))
        with open(os.path.join(chr_out_path, f), 'wb') as writefile:
            writer = csv.writer(writefile, delimiter='\t')
            # Write summary header of file (loops through all samples)
            write_combined_miso_header(sample_dir_names_list, main_dir, 
                                       chromo, f, writer)
            # Write sampled_psi and log_score for each sample:
            write_combined_psi_logscore(sample_dir_names_list,
                                        main_dir,
                                        chromo,
                                        f, writer)
            file_count += 1
def t_test_and_pickle(fnames_dic, chromo, output_dir, group_1_samples, group_2_samples, 
                      main_dir, queue_obj, min_counts):
    '''
    Combines several modules together into one so that the process
    can be easily multithreaded. 
    
    Return a dictionary containing chromosomes as keynames as fnames as values. 
    '''
    # Define constants
    pval_str = 'pval'
    event_str = 'event'
    # Define output dic
    # DEBUG
    fnames_dic = {}
    
    # Create directory to store pickled dictionary.
    make_dir(os.path.join(output_dir, chromo))
    
    '''
    # Get list of AS events that need to be t-tested.
    # Run the function on the lists separately to ensure
    # that each list contains at least one element.
    # This means our master_fnames_list is guaranteed to
    # have one sample in each group. 
    '''
    group_1_fnames_list = get_all_fnames(group_1_samples, main_dir, chromo)
    group_2_fnames_list = get_all_fnames(group_2_samples, main_dir, chromo)
    master_fnames_list = group_1_fnames_list + group_2_fnames_list
    
    # Remove repeats
    master_fnames_list = list(set(master_fnames_list))
    # master_fnames_size = len(master_fnames_list)
    # Do t-test between the two groups. 
    fnames_pickled_list = []
    count = 0
    
    for fname in master_fnames_list:
        count += 1
        # Get dictionary containing psi information for all samples.
        psi_info_dic, _ = get_psi_dic_across_samples(fname, 
                                                     group_1_samples, 
                                                     group_2_samples, 
                                                     main_dir, chromo, 
                                                     output_dir,
                                                     min_counts)
        # Add pval and event to dic
        psi_info_dic[pval_str] = [t_test_psi_info(psi_info_dic)]
        # Remove .miso from fname to get event name. 
        psi_info_dic[event_str] = [fname.split('.')[0]]    
        # Save dictionary as a pickle file.
        # add .pickle to fname
        pickled_fname = ''.join([fname, '.pickle'])
        output_fullpath = os.path.join(output_dir, chromo, pickled_fname)
        fnames_pickled_list.append(save_dic_as_pickle(psi_info_dic, 
                                                      output_fullpath))
    # save fnames list to output dic
    if chromo not in fnames_dic:
        fnames_dic[chromo] = fnames_pickled_list
    else:
        print('Warning, overwriting fnames_list in %s' %chromo)
    print('T-tested %s events in %s' %(count, chromo))
    queue_obj.put(fnames_dic)    # For multithreading