Ejemplo n.º 1
0
def main():
    main_dir = sys.argv[1]
    sample_dir_fullpath = sys.argv[2]
    output_path = sys.argv[3]
        
    # Define constants
    chr_str = 'chr'
    # Create list of chromosome names corresponding to folders within sample dir
    chr_list = [''.join([chr_str, str(c)]) for c in range(1, 23) + ['X', 'Y']]
    
    # Create list of sample directory names.
    sample_dir_names_list = get_sample_names_from_file(sample_dir_fullpath)
    
    # Subset list for only those that contain miso outputs.
    sample_dir_names_list = check_if_empty_dir(main_dir, sample_dir_names_list, chr_list)
    # sample_dir_names_list = sample_dir_names_csv.split(',')
    
    # Run on multiple threads.
    for chromo in chr_list:
        print('Sending %s job to core...' %chromo)
        Process(target=get_fnames_consolidate_miso,
                args=(main_dir, sample_dir_names_list, chromo, 
                      output_path)).start()
def main():
    parser = OptionParser()
    parser.add_option('-1', '--group1_file', dest='group_1_samplenames_file',
                      help='Filename containing group 1 sample names (PCa)')
    parser.add_option('-2', '--group2_file', dest='group_2_samplenames_file',
                      help='Filename containing group 2 sample names (NEPC)')
    parser.add_option('-d', '--main_directory', dest='main_dir',
                      help='Main directory containing miso output results.')
    parser.add_option('-o', '--output_directory', dest='output_dir',
                      help='Output directory of t-test results.')
    parser.add_option('-O', '--output_filename', dest='output_fname',
                      help='Output filename of the t-test results.')
    parser.add_option('-m', '--min_counts', type='int', dest='min_counts',
                      help='Minimum junction read counts to be considered '\
                      'into the t-test. Best practices says 10.')
    # Parse options
    (options, _) = parser.parse_args()
    # Define constants from options
    group_1_samplenames_file = options.group_1_samplenames_file
    group_2_samplenames_file = options.group_2_samplenames_file
    main_dir = options.main_dir
    output_dir = options.output_dir
    output_fname = options.output_fname
    min_counts = options.min_counts
    
    # Define constants
    summary_fullpath = os.path.join(output_dir, output_fname)
    
    # Get sample names from textfile.
    group_1_samples = get_sample_names_from_file(group_1_samplenames_file)
    group_2_samples = get_sample_names_from_file(group_2_samplenames_file)
    
    # Create list of chromosomes.
    chr_list = create_chromo_list(prefix='chr')
    # chr_list = ['chr11']
    
    # Subset list for only those that contain miso outputs.
    group_1_samples = check_if_empty_dir(main_dir, group_1_samples, chr_list)
    group_2_samples = check_if_empty_dir(main_dir, group_2_samples, chr_list)
    
    # Init fnames dic
    fnames_dic = {}
    
    # Run on multiple threads.
    q = Queue()
    process_list = []
    for chromo in chr_list:
        print('Sending %s job to core...' %chromo)
        p = Process(target=t_test_and_pickle,
                    args=(fnames_dic, chromo, output_dir, 
                          group_1_samples, group_2_samples, 
                          main_dir, q, min_counts))
        process_list.append(p)
        p.start()
    for chromo in chr_list:
        fnames_dic.update(q.get())
    
    # Wait for all threads to be done before continuing.
    for p in process_list:
        p.join()
        
    print('Completed %s jobs.' %len(chr_list))
    
    # Write fnames_dic as pickle file.
    pickle_filename = ''.join([output_fname, '_filenames_dic.pickle'])
    fnames_savepath = os.path.join(output_dir, pickle_filename)
    print('Saving filenames_dic.pickle to %s' %fnames_savepath)
    pickle_path = save_dic_as_pickle(fnames_dic, fnames_savepath)
    
    # Write information from pickle to textfile. 
    print('Writing information from pickle to textfile.')
    # Read pickle file to get fnames_dic
    fnames_dic = read_pickle(pickle_path)
    # Read and write to file. 
    read_pickle_write_to_file(summary_fullpath, chr_list, fnames_dic, 
                              filter_events=True)
    
    print('Summary file saved in: %s' %summary_fullpath)
def main():
    parser = OptionParser()
    parser.add_option('-1',
                      '--group1_file',
                      dest='group_1_samplenames_file',
                      help='Filename containing group 1 sample names (PCa)')
    parser.add_option('-2',
                      '--group2_file',
                      dest='group_2_samplenames_file',
                      help='Filename containing group 2 sample names (NEPC)')
    parser.add_option('-d',
                      '--main_directory',
                      dest='main_dir',
                      help='Main directory containing miso output results.')
    parser.add_option('-o',
                      '--output_directory',
                      dest='output_dir',
                      help='Output directory of t-test results.')
    parser.add_option('-O',
                      '--output_filename',
                      dest='output_fname',
                      help='Output filename of the t-test results.')
    parser.add_option('-m', '--min_counts', type='int', dest='min_counts',
                      help='Minimum junction read counts to be considered '\
                      'into the t-test. Best practices says 10.')
    # Parse options
    (options, _) = parser.parse_args()
    # Define constants from options
    group_1_samplenames_file = options.group_1_samplenames_file
    group_2_samplenames_file = options.group_2_samplenames_file
    main_dir = options.main_dir
    output_dir = options.output_dir
    output_fname = options.output_fname
    min_counts = options.min_counts

    # Define constants
    summary_fullpath = os.path.join(output_dir, output_fname)

    # Get sample names from textfile.
    group_1_samples = get_sample_names_from_file(group_1_samplenames_file)
    group_2_samples = get_sample_names_from_file(group_2_samplenames_file)

    # Create list of chromosomes.
    chr_list = create_chromo_list(prefix='chr')
    # chr_list = ['chr11']

    # Subset list for only those that contain miso outputs.
    group_1_samples = check_if_empty_dir(main_dir, group_1_samples, chr_list)
    group_2_samples = check_if_empty_dir(main_dir, group_2_samples, chr_list)

    # Init fnames dic
    fnames_dic = {}

    # Run on multiple threads.
    q = Queue()
    process_list = []
    for chromo in chr_list:
        print('Sending %s job to core...' % chromo)
        p = Process(target=t_test_and_pickle,
                    args=(fnames_dic, chromo, output_dir, group_1_samples,
                          group_2_samples, main_dir, q, min_counts))
        process_list.append(p)
        p.start()
    for chromo in chr_list:
        fnames_dic.update(q.get())

    # Wait for all threads to be done before continuing.
    for p in process_list:
        p.join()

    print('Completed %s jobs.' % len(chr_list))

    # Write fnames_dic as pickle file.
    pickle_filename = ''.join([output_fname, '_filenames_dic.pickle'])
    fnames_savepath = os.path.join(output_dir, pickle_filename)
    print('Saving filenames_dic.pickle to %s' % fnames_savepath)
    pickle_path = save_dic_as_pickle(fnames_dic, fnames_savepath)

    # Write information from pickle to textfile.
    print('Writing information from pickle to textfile.')
    # Read pickle file to get fnames_dic
    fnames_dic = read_pickle(pickle_path)
    # Read and write to file.
    read_pickle_write_to_file(summary_fullpath,
                              chr_list,
                              fnames_dic,
                              filter_events=True)

    print('Summary file saved in: %s' % summary_fullpath)