Example #1
0
def main(args):
    # Checks if there is a control library
    control_lib_exists = True
    if (args.control_file is None):
        control_lib_exists = False

    # Create deep copy of the 'args' object for each treatment
    args_1 = copy.deepcopy(args)
    args_2 = copy.deepcopy(args)

    # Format each args for SICER run
    args_1.treatment_file = str(args.treatment_file[0])
    args_2.treatment_file = str(args.treatment_file[1])
    args_1.df = False
    args_2.df = False

    if (control_lib_exists):
        args_1.control_file = str(args.control_file[0])
        args_2.control_file = str(args.control_file[1])

        # Execute run_SICER for each treatment library
    temp_dir_1, library_size_file1 = run_SICER.main(args_1, True)
    temp_dir_2, library_size_file2 = run_SICER.main(args_2, True)


    try:
        temp_dir = tempfile.mkdtemp()
        # Change current working directory to temp_dir
        os.chdir(temp_dir)
    except:
        sys.exit(
            "Temporary directory required for SICER_df cannot be created. Check if directories can be created in %s."
            % curr_path)
    try:
        num_chroms = len(args.species_chroms)
        pool = mp.Pool(processes=min(args.cpu, num_chroms))

        # Find the union island between two treatment files. It will generate a summary file
        print("\n")
        args.treatment_file[0] = os.path.basename(args.treatment_file[0])
        args.treatment_file[1] = os.path.basename(args.treatment_file[1])
        print("Finding all the union islands of ", args.treatment_file[0], "and ", args.treatment_file[1], "...")
        find_union_islands.main(args, temp_dir_1, temp_dir_2, pool)
        print("\n")

        # Compare two treatment libraries
        print("Comparing two treatment libraries...")
        compare_two_libraries_on_islands.main(args, temp_dir_1, temp_dir_2, library_size_file1, library_size_file2, pool)
        print("\n")

        print("Identifying significantly increased islands using BH corrected p-value cutoff...")
        filter_islands_by_significance.main(args, 9, pool)
        print("\n")

        print("Identifying significantly decreased islands using BH-corrected p-value cutoff...")
        filter_islands_by_significance.main(args, 12, pool)
        print("\n")

        pool.close()
        pool.join()

    finally:
        print("Removing all temporary directories and all files in it.")
        shutil.rmtree(temp_dir)
        shutil.rmtree(temp_dir_1)
        shutil.rmtree(temp_dir_2)
Example #2
0
def main(
    args,
    df_run=False
):  # df_run indicates if run_RECOGNICER is being called by run_RECOGNICER_df function.

    # Checks if there is a control library
    control_lib_exists = True
    if (args.control_file is None):
        control_lib_exists = False

    try:
        temp_dir = tempfile.mkdtemp()
        # Change current working directory to temp_dir
        os.chdir(temp_dir)
    except:
        sys.exit(
            "Temporary directory required for SICER cannot be created. Check if directories can be created in %s."
            % curr_path)
    try:
        # Step 0: create Pool object for parallel-Processing
        num_chroms = len(args.species_chroms)
        pool = mp.Pool(processes=min(args.cpu, num_chroms))

        # Step 1: Remove redundancy reads in input file according to input threshold
        treatment_file_name = os.path.basename(args.treatment_file)
        print("Preprocess the", treatment_file_name,
              "file to remove redundancy with threshold of",
              args.redundancy_threshold, "\n")
        total_treatment_read_count = remove_redundant_reads.main(
            args, args.treatment_file, pool)
        args.treatment_file = treatment_file_name
        print('\n')

        # Step 2: Remove redundancy reads in control library according to input threshold
        if (control_lib_exists):
            control_file_name = os.path.basename(args.control_file)
            print("Preprocess the", control_file_name,
                  "file to remove redundancy with threshold of",
                  args.redundancy_threshold, "\n")
            total_control_read_count = remove_redundant_reads.main(
                args, args.control_file, pool)
            args.control_file = control_file_name
            print('\n')

        # Step 3: Partition the genome in windows and generate graph files for each chromsome
        print(
            "Partitioning the genome in windows and generate summary files... \n"
        )
        total_tag_in_windows = run_make_graph_file_by_chrom.main(args, pool)
        print("\n")

        # Step4+5: Normalize and generate WIG file
        print(
            "Normalizing graphs by total island filitered reads per million and generating summary WIG file...\n"
        )
        output_WIG_name = (treatment_file_name.replace('.bed', '') + "-W" +
                           str(args.window_size) + "-normalized.wig")
        make_normalized_wig.main(args, output_WIG_name, pool)

        # Step 6: Find condidate islands exhibiting clustering
        print("Finding candidate islands exhibiting clustering... \n")
        coarsegraining.main(args, total_tag_in_windows, pool)
        print("\n")

        # Running SICER with a control library
        if (control_lib_exists):
            # Step 7
            print(
                "Calculating significance of candidate islands using the control library... \n"
            )
            associate_tags_with_chip_and_control_w_fc_q.main(
                args, total_treatment_read_count, total_control_read_count,
                pool)

            # Step 8:
            print("Identifying significant islands using FDR criterion...")
            significant_read_count = filter_islands_by_significance.main(
                args, 7,
                pool)  # 7 represents the n-th column we want to filtered by
            print("Out of the ", total_treatment_read_count, " reads in ",
                  treatment_file_name, ", ", significant_read_count,
                  " reads are in significant islands")

        # Optional Outputs
        if (args.significant_reads):
            # Step 9: Filter treatment reads by the significant islands found from step 8
            print("Filter reads with identified significant islands...\n")
            filter_raw_tags_by_islands.main(args, pool)

            # Step 10: Produce graph file based on the filtered reads from step 9
            print("Make summary graph with filtered reads...\n")
            run_make_graph_file_by_chrom.main(args, pool, True)
            # Step 11: Produce Normalized WIG file
            print(
                "\nNormalizing graphs by total island filitered reads per million and generating summary WIG file \n"
            )
            output_WIG_name = (treatment_file_name.replace('.bed', '') + "-W" +
                               str(args.window_size) + "-FDR" +
                               str(args.false_discovery_rate) +
                               "-islandfiltered-normalized.wig")
            make_normalized_wig.main(args, output_WIG_name, pool)

        pool.close()
        pool.join()
        # Final Step
        if (df_run == True):
            return temp_dir, total_treatment_read_count
        else:
            print("End of SICER")
    finally:
        if df_run == False:
            print("Removing temporary directory and all files in it.")
            shutil.rmtree(temp_dir)