Esempio n. 1
0
def make_window_regions(configuration):
    print("{0} Creating window regions...".format(INFO))
    print("{0} Processing type is: {1}".format(INFO, configuration['processing']['type']))

    if configuration['processing']['type'] == 'multi':
        from parallel import par_make_window_regions
        return par_make_window_regions(configuration=configuration)

    windowsize = configuration["window_size"]
    chromosome = configuration["chromosome"]

    print("{0} Window size: {1}".format(INFO, windowsize))
    print("{0} Chromosome:  {1}".format(INFO, chromosome))

    regions = configuration["regions"]
    print("{0} Regions used {1}".format(INFO, regions))

    regions_list = [(start, end) for start, end
                    in zip(regions["start"], regions["end"])]

    regions_created = []

    counter = 0
    for r in regions_list:

        start_idx = r[0]
        end_idx = r[1]

        print("{0} Start index: {1}".format(INFO, start_idx))
        sys.stdout.flush()
        print("{0} End index:   {1}".format(INFO, end_idx))
        sys.stdout.flush()
        region = Region(idx=counter,
                        start=start_idx,
                        end=end_idx,
                        window_size=windowsize)

        kwargs = {"sam_read_config": configuration["sam_read_config"]}

        if "debug" in configuration:
            kwargs["debug"] = configuration["debug"]

        print("{0} Creating WGA Windows...".format(INFO))
        sys.stdout.flush()
        region.make_wga_windows(chromosome=chromosome,
                                ref_filename=configuration["reference_file"]["filename"],
                                bam_filename=configuration["wga_file"]["filename"],
                                **kwargs)

        if region.get_n_windows(type_=WindowType.WGA) == 0:
            raise Error("WGA windows have not been created")
        else:
            print("{0} Number of WGA "
                  "windows: {1}".format(INFO,
                                        region.get_n_windows(type_=WindowType.WGA)))
            sys.stdout.flush()

        print("{0} Creating No WGA Windows...".format(INFO))
        sys.stdout.flush()
        region.make_no_wga_windows(chromosome=chromosome,
                                   ref_filename=configuration["reference_file"]["filename"],
                                   bam_filename=configuration["no_wga_file"]["filename"],
                                   **kwargs)

        if region.get_n_windows(type_=WindowType.NO_WGA) == 0:
            raise Error("Non-WGA windows have not  been created")
        else:
            print("{0} Number of Non WGA"
                  " windows: {1}".format(INFO,
                                         region.get_n_windows(type_=WindowType.NO_WGA)))
            sys.stdout.flush()

        regions_created.append(region)
        counter += 1

    return regions_created
def make_window_regions(configuration):

    print("{0} Creating window regions...".format(INFO))

    windowsize = configuration["window_size"]
    chromosome = configuration["chromosome"]

    print("{0} Window size: {1}".format(INFO, windowsize))
    print("{0} Chromosome:  {1}".format(INFO, chromosome))

    regions = configuration["regions"]
    print("{0} Regions used {1}".format(INFO, regions))

    regions_list = [(start, end)
                    for start, end in zip(regions["start"], regions["end"])]

    regions_created = []

    counter = 0
    for r in regions_list:

        start_idx = r[0]
        end_idx = r[1]

        print("{0} Start index: {1}".format(INFO, start_idx))
        print("{0} End index:   {1}".format(INFO, end_idx))

        region = Region(idx=counter,
                        start=start_idx,
                        end=end_idx,
                        window_size=windowsize)

        kwargs = {}

        if "quality_theshold" in configuration:
            kwargs["quality_theshold"] = configuration["quality_theshold"]

        if "debug" in configuration:
            kwargs["debug"] = configuration["debug"]

        print("{0} Creating WGA Windows...".format(INFO))
        region.make_wga_windows(
            chromosome=chromosome,
            ref_filename=configuration["reference_file"]["filename"],
            test_filename=configuration["test_file"]["filename"],
            **kwargs)

        if region.get_n_windows(type_=WindowType.WGA) == 0:
            raise Error("WGA windows have not been created")
        else:
            print("{0} Number of WGA windows: {1}".format(
                INFO, region.get_n_windows(type_=WindowType.WGA)))

        print("{0} Creating No WGA Windows...".format(INFO))
        region.make_no_wga_windows(
            chromosome=chromosome,
            ref_filename=configuration["reference_file"]["filename"],
            test_filename=configuration["no_wga_file"]["filename"],
            **kwargs)

        if region.get_n_windows(type_=WindowType.NO_WGA) == 0:
            raise Error("Non-WGA windows have not  been created")
        else:
            print("{0} Number of non-wga"
                  " windows: {1}".format(
                      INFO, region.get_n_windows(type_=WindowType.NO_WGA)))


        if "check_windowing_sanity" in configuration and \
          configuration["check_windowing_sanity"]:

            region.check_windows_sanity()

        # compute the mixed windows for the region
        region.get_mixed_windows()

        # filter the windows for N's
        if "remove_windows_with_N" in configuration and\
          configuration["remove_windows_with_N"]:

            print("{0} Filtering windows for Ns...".format(INFO))

            region.remove_windows_with_ns()

            print("{0} Number of wga windows"
                  " after filtering: {1}".format(
                      INFO, region.get_n_windows(type_=WindowType.WGA)))
            print("{0} Number of non-wga windows"
                  " after filtering: {1}".format(
                      INFO, region.get_n_windows(type_=WindowType.NO_WGA)))
            print("{0} Done...".format(INFO))
        elif "mark_N_windows" in configuration and\
          configuration["mark_N_windows"]:

            print("{0} Marking N "
                  " windows with: {1}".format(
                      INFO, configuration["mark_for_N_windows"]))
            counter_ns = \
              region.mark_windows_with_ns(n_mark=configuration["mark_for_N_windows"])

            print("{0} Marked as N {1} Windows".format(INFO, counter_ns))

        else:
            print("{0} No filtering windows"
                  " for Ns requested...".format(INFO))

        print("{0} Number of mixed "
              "windows: {1}".format(INFO, region.get_n_mixed_windows()))

        print("{0} Number of N windows: {1}".format(INFO,
                                                    region.count_n_windows()))


        if "outlier_remove" in configuration and\
          configuration["outlier_remove"]:

            region.remove_outliers(configuration=configuration)
            print("{0} Number of windows "
                  "after outlier removal: {1}".format(
                      INFO, region.get_n_mixed_windows()))

            print("{0} Number of N windows "
                  "after outlier removal {1}".format(INFO,
                                                     region.count_n_windows()))

        else:
            print("{0} No outlier " "removal performed".format(INFO))

        # save the region statistics
        region.save_mixed_windows_statistic(statistic="mean")
        regions_created.append(region)
        counter += 1

    return regions_created