Beispiel #1
0
def clear_product_blocks(blocks):
    blocks_samples = list(blocks.keys())
    blocks_samples.sort(reverse=True)
    iter = 0
    total_blocks = make_block_stats(blocks)["total"]
    for samples_num in blocks_samples:
        patterns_numbers = list(blocks[samples_num].keys())
        patterns_numbers.sort(reverse=True)
        for patterns_num in patterns_numbers:
            good_blocks = []
            for block in blocks[samples_num][patterns_num]:
                iter += 1
                if iter % LOG_ITERATION == 0:
                    print("Product blocks checked {}/{}".format(
                        iter, total_blocks))
                block_good = True
                prod_finder = ProductFinder(blocks,
                                            samples_num,
                                            patterns_num,
                                            equal=True)
                products = prod_finder.find_products()
                for product in products:
                    if len(product.product_list) == 1:
                        continue
                    if not block_good:
                        break
                    for product_block in product:
                        if product_block == block:
                            block_good = False
                            break
                if block_good:
                    good_blocks.append(block)
            blocks[samples_num][patterns_num] = good_blocks
    return clear_empty_block_types(blocks)
Beispiel #2
0
def main():
    args = read_args()
    ncs, msg = find_ncs(args.ncs, script_path)
    if not ncs:
        print(msg)
        exit()
    result, blocks, ncs_name, deuterated = read_blocks(args.elb_file)
    if result:
        exit()
    product_finder = ProductFinder(blocks, args.samples, args.min_patterns)
    products = product_finder.find_products()
    schemes_total = sum([len(product) for product in products])
    blocks_total = {}
    print("{} products found".format(schemes_total))
    output = "[NCS = {}]\n".format(ncs_name)
    output += "[Deuterated = {}]\n".format(deuterated)
    for product in products:
        for scheme in product:
            scheme.simplify()
            blocks_total = add_block(blocks_total, scheme.samples,
                                     len(scheme.patterns))
            output += scheme.full_str()
            output += "\n"
    stats_total = make_block_stats(blocks_total)

    if args.output:
        with open(args.output, mode="w") as f:
            f.write(output)
            print("{} blocks were written to the file \'{}\'".format(
                schemes_total, args.output))
    else:
        print(output)
    print("Statistics of generated blocks:")
    print(stats_total["str"])
Beispiel #3
0
def clear_redundant_blocks(blocks):
    iter = 0
    total_blocks = make_block_stats(blocks)["total"]
    for samples_num in blocks:
        patterns_numbers = list(blocks[samples_num].keys())
        patterns_numbers.sort(reverse=True)
        good_blocks = []
        for pattern_num in patterns_numbers:
            new_block_list = []
            for block in blocks[samples_num][pattern_num]:
                iter += 1
                if iter % LOG_ITERATION == 0:
                    print("Redundant blocks checked {}/{}".format(
                        iter, total_blocks))
                block_good = True
                for good_block in good_blocks:
                    if block.is_subset_of(good_block.simplified):
                        block_good = False
                        break
                if block_good:
                    good_blocks.append(block)
                    new_block_list.append(block)
            if not new_block_list:
                blocks[samples_num].pop(pattern_num)
            else:
                blocks[samples_num][pattern_num] = new_block_list
    return clear_empty_block_types(blocks)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("files",
                        help='Specify file(s) for which to calculate stats',
                        type=str,
                        nargs='+')
    parser.add_argument("--total",
                        "-t",
                        help='Print only total number of blocks',
                        action="store_true")
    args = parser.parse_args()
    blocks_total = {}
    for file in args.files:
        blocks = {}
        if not os.path.isfile(file):
            print("Error! File '{}' not found!".format(file))
            return
        output = "{:>25}:".format(file)
        with open(file, "r") as f:
            for line in f:
                result = Constants.elb_re.match(line)
                if result:
                    blocks = add_block(blocks, int(result.group(1)),
                                       int(result.group(2)))
                    blocks_total = add_block(blocks_total,
                                             int(result.group(1)),
                                             int(result.group(2)))
        stats = make_block_stats(blocks)
        if args.total:
            output += " {:>7}".format(int(stats["total"]))
        else:
            output += "\n" + stats["str"]
        print(output)
    stats_total = make_block_stats(blocks_total)
    if len(args.files) > 1:
        output = "{:>25}:".format("TOTAL FOR {} FILES".format(len(args.files)))
        if args.total:
            output += " {:>7}".format(int(stats_total["total"]))
        else:
            output += "\n" + stats_total["str"]
        print(output)
Beispiel #5
0
def clear_blocks(blocks, flags):
    (identical_flag, redundant_flag, product_flag) = flags
    print(make_block_stats(blocks)["str"] + "\n")

    if identical_flag:
        print("Clearing identical blocks...")
        blocks = clear_identical_blocks(blocks)
        print("Identical blocks cleared...")
        print(make_block_stats(blocks)["str"] + "\n")

    if redundant_flag:
        print("Clearing redundant blocks...")
        blocks = clear_redundant_blocks(blocks)
        print("Redundant blocks cleared...")
        print(make_block_stats(blocks)["str"] + "\n")

    if product_flag:
        print("Clearing product blocks...")
        blocks = clear_product_blocks(blocks)
        print("Product blocks cleared...")
        print(make_block_stats(blocks)["str"] + "\n")
    return blocks
Beispiel #6
0
def clear_identical_blocks(blocks):
    iter = 0
    total_blocks = make_block_stats(blocks)["total"]
    for samples_num in blocks:
        patterns_numbers = list(blocks[samples_num].keys())
        patterns_numbers.sort(reverse=True)
        for pattern_num in patterns_numbers:
            new_block_dict = {}
            for block_one in blocks[samples_num][pattern_num]:
                iter += 1
                if iter % LOG_ITERATION == 0:
                    print("Identical blocks checked {}/{}".format(
                        iter, total_blocks))
                new_block_dict[block_one] = block_one
            if not new_block_dict:
                blocks[samples_num].pop(pattern_num)
            else:
                blocks[samples_num][pattern_num] = list(
                    new_block_dict.values())
    return clear_empty_block_types(blocks)
Beispiel #7
0
def filterT_blocks(blocks, min_t_free, verbose_output):
    iter = 0
    total_blocks = make_block_stats(blocks)["total"]
    filtered_blocks = 0
    print(
        "Filter blocks with less then {} patterns that have no \"T\" labeling".
        format(min_t_free))
    for samples_num in blocks:
        patterns_numbers = list(blocks[samples_num].keys())
        patterns_numbers.sort(reverse=True)
        for pattern_num in patterns_numbers:
            new_block_list = []
            for block in blocks[samples_num][pattern_num]:
                iter += 1
                if iter % LOG_ITERATION == 0:
                    print("filterT blocks checked {}/{}".format(
                        iter, total_blocks))
                have_t, have_no_t = count_typeT(block)
                output = "have_t = {:2} have_no_t = {:2}, ".format(
                    have_t, have_no_t)
                if have_no_t >= min_t_free:
                    output += "APPEND block"
                    new_block_list.append(block)
                    filtered_blocks += 1
                else:
                    output += "SKIP block"
                if verbose_output:
                    print(block.full_str())
                    print(output + "\n")
            if not new_block_list:
                blocks[samples_num].pop(pattern_num)
            else:
                blocks[samples_num][pattern_num] = new_block_list
    print("Filtered {}/{} blocks by T labeling, min_t_free = {}".format(
        filtered_blocks, total_blocks, min_t_free))
    return clear_empty_block_types(blocks)
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("files",
                        help='Specify file(s) for which to calculate stats',
                        type=str,
                        nargs='+')
    parser.add_argument("--total",
                        "-t",
                        help='Print only total number of blocks',
                        action="store_true")
    parser.add_argument("--sv",
                        "-s",
                        help='Print SV min andd max values',
                        action="store_true")
    args = parser.parse_args()
    blocks_total = {}
    sv_min = dict()
    sv_max = dict()
    for file in args.files:
        blocks = {}
        if not os.path.isfile(file):
            print("Error! File '{}' not found!".format(file))
            return
        output = "{:>25}:".format(file)
        with open(file, "r") as f:
            for line in f:
                result = Constants.elb_re.match(line)
                if result:
                    samples = int(result.group('samples'))
                    patterns = int(result.group('patterns'))
                    #print("{:>2} {:>2}".format(samples, patterns))
                    blocks = add_block(blocks, samples, patterns)
                    blocks_total = add_block(blocks_total, samples, patterns)
                    continue
                if args.sv:
                    sv_re_match = Constants.sv_re.match(line)
                    if sv_re_match:
                        sv = [int(i) for i in sv_re_match.group('sv').split()]
                        if samples in sv_min.keys():
                            sv_min[samples] = list(
                                map(min, zip(sv_min[samples], sv)))
                            sv_max[samples] = list(
                                map(max, zip(sv_max[samples], sv)))
                        else:
                            #print(sv)
                            #print(sv_min.keys())
                            sv_min[samples] = sv
                            sv_max[samples] = sv
        stats = make_block_stats(blocks)
        if args.total:
            output += " {:>7}".format(int(stats["total"]))
        else:
            output += "\n" + stats["str"]
        print(output)
    stats_total = make_block_stats(blocks_total)
    if len(args.files) > 1:
        output = "{:>25}:".format("TOTAL FOR {} FILES".format(len(args.files)))
        if args.total:
            output += " {:>7}".format(int(stats_total["total"]))
        else:
            output += "\n" + stats_total["str"]
        print(output)
    if args.sv:
        print("Min and max values for SV vectors:")
        for s in sorted(sv_min.keys()):

            min_out = "SV_MIN ({} samples): ".format(s)
            max_out = "SV_MAX ({} samples): ".format(s)
            #print(sv_min)
            #print(sv_max)
            for mins in sv_min[s]:
                min_out += "{:>2} ".format(mins)
            for maxs in sv_max[s]:
                max_out += "{:>2} ".format(maxs)
            print(min_out)
            print(max_out)