def main():
    time0 = time.time()
    print_title = "GetOrganelle v" + str(get_versions()) + \
                  "\n\nThis is a script for extracting organelle genomes" \
                  " from slim_fastg.py-produced files (csv & fastg). " + \
                  "\nBy [email protected]\n\n"
    options, log_handler = get_options(print_title)

    @set_time_limit(options.time_limit)
    def disentangle_circular_assembly(fastg_file,
                                      tab_file,
                                      prefix,
                                      weight_factor,
                                      type_factor,
                                      mode="embplant_pt",
                                      log_hard_cov_threshold=10.,
                                      expected_max_size=inf,
                                      expected_min_size=0,
                                      contamination_depth=3.,
                                      contamination_similarity=5.,
                                      degenerate=True,
                                      degenerate_depth=1.5,
                                      degenerate_similarity=1.5,
                                      min_sigma_factor=0.1,
                                      max_copy_in=10,
                                      only_max_cov=True,
                                      keep_temp=False,
                                      acyclic_allowed=False,
                                      verbose=False,
                                      inner_logging=None,
                                      debug=False):
        if options.resume and os.path.exists(prefix +
                                             ".graph1.selected_graph.gfa"):
            pass
            if inner_logging:
                inner_logging.info(">>> Result graph existed!")
            else:
                sys.stdout.write(">>> Result graph existed!\n")
        else:
            time_a = time.time()
            if inner_logging:
                inner_logging.info(">>> Parsing " + fastg_file + " ..")
            else:
                sys.stdout.write("Parsing " + fastg_file + " ..\n")
            input_graph = Assembly(fastg_file,
                                   min_cov=options.min_cov,
                                   max_cov=options.max_cov)
            time_b = time.time()
            if inner_logging:
                inner_logging.info(">>> Parsing input fastg file finished: " +
                                   str(round(time_b - time_a, 4)) + "s")
            else:
                sys.stdout.write("\n>>> Parsing input fastg file finished: " +
                                 str(round(time_b - time_a, 4)) + "s\n")
            temp_graph = prefix + ".temp.fastg" if keep_temp else None

            copy_results = input_graph.find_target_graph(
                tab_file,
                database_name=mode,
                mode=mode,
                type_factor=type_factor,
                weight_factor=weight_factor,
                log_hard_cov_threshold=log_hard_cov_threshold,
                contamination_depth=contamination_depth,
                contamination_similarity=contamination_similarity,
                degenerate=degenerate,
                degenerate_depth=degenerate_depth,
                degenerate_similarity=degenerate_similarity,
                expected_max_size=expected_max_size,
                expected_min_size=expected_min_size,
                max_contig_multiplicity=max_copy_in,
                only_keep_max_cov=only_max_cov,
                min_sigma_factor=min_sigma_factor,
                temp_graph=temp_graph,
                broken_graph_allowed=acyclic_allowed,
                verbose=verbose,
                log_handler=inner_logging,
                debug=debug)
            time_c = time.time()
            if inner_logging:
                inner_logging.info(">>> Detecting target graph finished: " +
                                   str(round(time_c - time_b, 4)) + "s")
                if len(copy_results) > 1:
                    inner_logging.info(
                        str(len(copy_results)) + " set(s) of graph detected.")
            else:
                sys.stdout.write("\n\n>>> Detecting target graph finished: " +
                                 str(round(time_c - time_b, 4)) + "s\n")
                if len(copy_results) > 1:
                    sys.stdout.write(
                        str(len(copy_results)) +
                        " set(s) of graph detected.\n")

            degenerate_base_used = False
            if acyclic_allowed:
                # still_complete = []
                for go_res, copy_res in enumerate(copy_results):
                    go_res += 1
                    broken_graph = copy_res["graph"]
                    count_path = 0

                    these_paths = broken_graph.get_all_paths(
                        mode=mode, log_handler=inner_logging)
                    # reducing paths
                    if len(these_paths) > options.max_paths_num:
                        this_warn_str = "Only exporting " + str(options.max_paths_num) + " out of all " + \
                                        str(len(these_paths)) + " possible paths. (see '--max-paths-num' to change it.)"
                        if inner_logging:
                            inner_logging.warning(this_warn_str)
                        else:
                            sys.stdout.write("Warning: " + this_warn_str +
                                             "\n")
                        these_paths = these_paths[:options.max_paths_num]

                    # exporting paths, reporting results
                    for this_paths, other_tag in these_paths:
                        count_path += 1
                        all_contig_str = []
                        contigs_are_circular = []
                        for go_contig, this_p_part in enumerate(this_paths):
                            this_contig = broken_graph.export_path(this_p_part)
                            if DEGENERATE_BASES & set(this_contig.seq):
                                degenerate_base_used = True
                            if this_contig.label.endswith("(circular)"):
                                contigs_are_circular.append(True)
                            else:
                                contigs_are_circular.append(False)
                            if len(this_paths
                                   ) == 1 and contigs_are_circular[-1]:
                                all_contig_str.append(this_contig.fasta_str())
                            else:
                                all_contig_str.append(">contig_" +
                                                      str(go_contig + 1) +
                                                      "--" +
                                                      this_contig.label +
                                                      "\n" + this_contig.seq +
                                                      "\n")
                        if len(all_contig_str) == 1 and set(
                                contigs_are_circular) == {True}:
                            # print ir stat
                            if count_path == 1 and mode == "embplant_pt":
                                detect_seq = broken_graph.export_path(
                                    this_paths[0]).seq
                                ir_stats = detect_plastome_architecture(
                                    detect_seq, 1000)
                                print_str = "Detecting large repeats (>1000 bp) in PATH1 with " + ir_stats[-1] +\
                                            ", Total:LSC:SSC:Repeat(bp) = " + str(len(detect_seq)) + ":" + \
                                            ":".join([str(len_val) for len_val in ir_stats[:3]])
                                if inner_logging:
                                    inner_logging.info(print_str)
                                else:
                                    sys.stdout.write(print_str + "\n")
                        # if len(all_contig_str) == 1 and set(contigs_are_circular) == {True}:
                        #     still_complete.append(True)
                        # else:
                        #     still_complete.append(False)
                        open(
                            prefix + ".graph" + str(go_res) + other_tag + "." +
                            str(count_path) + ".path_sequence.fasta",
                            "w").write("\n".join(all_contig_str))
                    broken_graph.write_to_gfa(prefix + ".graph" + str(go_res) +
                                              ".selected_graph.gfa")
            else:
                for go_res, copy_res in enumerate(copy_results):
                    go_res += 1
                    idealized_graph = copy_res["graph"]
                    # should add making one-step-inversion pairs for paths,
                    # which would be used to identify existence of a certain isomer using mapping information
                    count_path = 0

                    these_paths = idealized_graph.get_all_circular_paths(
                        mode=mode,
                        log_handler=inner_logging,
                        reverse_start_direction_for_pt=options.reverse_lsc)
                    # reducing paths
                    if len(these_paths) > options.max_paths_num:
                        this_warn_str = "Only exporting " + str(options.max_paths_num) + " out of all " + \
                                        str(len(these_paths)) + " possible paths. (see '--max-paths-num' to change it.)"
                        if inner_logging:
                            inner_logging.warning(this_warn_str)
                        else:
                            sys.stdout.write("Warning: " + this_warn_str +
                                             "\n")
                        these_paths = these_paths[:options.max_paths_num]

                    # exporting paths, reporting results
                    for this_path, other_tag in these_paths:
                        count_path += 1
                        this_seq_obj = idealized_graph.export_path(this_path)
                        if DEGENERATE_BASES & set(this_seq_obj.seq):
                            degenerate_base_used = True
                        open(
                            prefix + ".graph" + str(go_res) + other_tag + "." +
                            str(count_path) + ".path_sequence.fasta",
                            "w").write(this_seq_obj.fasta_str())
                        # print ir stat
                        if count_path == 1 and mode == "embplant_pt":
                            detect_seq = this_seq_obj.seq
                            ir_stats = detect_plastome_architecture(
                                detect_seq, 1000)
                            print_str = "Detecting large repeats (>1000 bp) in PATH1 with " + ir_stats[-1] + \
                                        ", Total:LSC:SSC:Repeat(bp) = " + str(len(detect_seq)) + ":" + \
                                        ":".join([str(len_val) for len_val in ir_stats[:3]])
                            if inner_logging:
                                inner_logging.info(print_str)
                            else:
                                sys.stdout.write(print_str + "\n")
                    idealized_graph.write_to_gfa(prefix + ".graph" +
                                                 str(go_res) +
                                                 ".selected_graph.gfa")
            if degenerate_base_used:
                inner_logging.warning("Degenerate base(s) used!")
            time_d = time.time()
            if inner_logging:
                inner_logging.info(
                    ">>> Solving and unfolding graph finished: " +
                    str(round(time_d - time_c, 4)) + "s")
            else:
                sys.stdout.write(
                    "\n\n>>> Solving and unfolding graph finished: " +
                    str(round(time_d - time_c, 4)) + "s\n")

    try:
        disentangle_circular_assembly(
            options.fastg_file,
            options.tab_file,
            os.path.join(options.output_directory, options.prefix),
            type_factor=options.type_factor,
            mode=options.mode,
            weight_factor=options.weight_factor,
            log_hard_cov_threshold=options.depth_factor,
            contamination_depth=options.contamination_depth,
            contamination_similarity=options.contamination_similarity,
            degenerate=options.degenerate,
            degenerate_depth=options.degenerate_depth,
            degenerate_similarity=options.degenerate_similarity,
            expected_max_size=options.expected_max_size,
            expected_min_size=options.expected_min_size,
            min_sigma_factor=options.min_sigma_factor,
            max_copy_in=options.max_multiplicity,
            only_max_cov=options.only_keep_max_cov,
            acyclic_allowed=options.acyclic_allowed,
            keep_temp=options.keep_temp_graph,
            inner_logging=log_handler,
            verbose=options.verbose,
            debug=options.debug)
        log_handler = simple_log(logging.getLogger(), options.output_directory,
                                 options.prefix + ".disentangle.")

        log_handler.info('\nTotal cost: ' +
                         str(round(time.time() - time0, 4)) + 's\n')
    except IOError as e:
        raise e
    except KeyError as e:
        if str(e).strip("'") == options.mode:
            log_handler.error(options.mode + " not found in " +
                              str(options.tab_file) + "!")
            log_handler.error("Disentangling failed!")
        else:
            log_handler.exception(str(e))
            log_handler.error("Disentangling failed!")
            if not options.acyclic_allowed:
                log_handler.info(
                    "You might try again with '--linear' to export contig(s) "
                    "instead of circular genome.")
            log_handler = simple_log(log_handler, options.output_directory,
                                     options.prefix + ".disentangle.")
            log_handler.info("\nTotal cost " + str(time.time() - time0))
            log_handler.info(
                "Please email [email protected] if you find bugs!\n")
    except Exception as e:
        log_handler.exception(str(e))
        log_handler.error("Disentangling failed!")
        if not options.acyclic_allowed:
            log_handler.info(
                "You might try again with '--linear' to export contig(s) "
                "instead of circular genome.")
        log_handler = simple_log(log_handler, options.output_directory,
                                 options.prefix + ".disentangle.")
        log_handler.info("\nTotal cost " + str(time.time() - time0))
        log_handler.info(
            "Please email [email protected] if you find bugs!\n")
    logging.shutdown()
def main():
    time0 = time.time()
    print_title = "GetOrganelle v" + str(get_versions()) + \
                  "\n\nThis is a script for extracting organelle genomes" \
                  " from slim_fastg.py-produced files (csv & fastg). " + \
                  "\nBy [email protected]\n\n"
    options, log_handler = get_options(print_title)

    @set_time_limit(options.time_limit)
    def disentangle_circular_assembly(fastg_file, tab_file, prefix, weight_factor, type_factor, mode="embplant_pt",
                                      log_hard_cov_threshold=10., expected_max_size=inf, expected_min_size=0,
                                      contamination_depth=3., contamination_similarity=5.,
                                      degenerate=True, degenerate_depth=1.5, degenerate_similarity=1.5,
                                      min_sigma_factor=0.1, only_max_c=True, keep_temp=False, acyclic_allowed=False,
                                      verbose=False, log_handler=None, debug=False):
        if options.resume and os.path.exists(prefix + ".graph1.selected_graph.gfa"):
            pass
            if log_handler:
                log_handler.info(">>> Result graph existed!")
            else:
                sys.stdout.write(">>> Result graph existed!\n")
        else:
            time_a = time.time()
            if log_handler:
                log_handler.info(">>> Parsing " + fastg_file + " ..")
            else:
                sys.stdout.write("Parsing " + fastg_file + " ..\n")
            input_graph = Assembly(fastg_file, min_cov=options.min_cov, max_cov=options.max_cov)
            time_b = time.time()
            if log_handler:
                log_handler.info(">>> Parsing input fastg file finished: " + str(round(time_b - time_a, 4)) + "s")
            else:
                sys.stdout.write("\n>>> Parsing input fastg file finished: " + str(round(time_b - time_a, 4)) + "s\n")
            temp_graph = prefix + ".temp.fastg" if keep_temp else None

            copy_results = input_graph.find_target_graph(tab_file, mode=mode, type_factor=type_factor,
                                                         weight_factor=weight_factor,
                                                         log_hard_cov_threshold=log_hard_cov_threshold,
                                                         contamination_depth=contamination_depth,
                                                         contamination_similarity=contamination_similarity,
                                                         degenerate=degenerate, degenerate_depth=degenerate_depth,
                                                         degenerate_similarity=degenerate_similarity,
                                                         expected_max_size=expected_max_size,
                                                         expected_min_size=expected_min_size,
                                                         only_keep_max_cov=only_max_c,
                                                         min_sigma_factor=min_sigma_factor,
                                                         temp_graph=temp_graph,
                                                         broken_graph_allowed=acyclic_allowed,
                                                         verbose=verbose, log_handler=log_handler,
                                                         debug=debug)
            time_c = time.time()
            if log_handler:
                log_handler.info(">>> Detecting target graph finished: " + str(round(time_c - time_b, 4)) + "s")
                if len(copy_results) > 1:
                    log_handler.info(str(len(copy_results)) + " set(s) of graph detected.")
            else:
                sys.stdout.write("\n\n>>> Detecting target graph finished: " + str(round(time_c - time_b, 4)) + "s\n")
                if len(copy_results) > 1:
                    sys.stdout.write(str(len(copy_results)) + " set(s) of graph detected.\n")

            degenerate_base_used = False
            if acyclic_allowed:
                # still_complete = []
                for go_res, copy_res in enumerate(copy_results):
                    broken_graph = copy_res["graph"]
                    count_path = 0
                    for this_paths, other_tag in broken_graph.get_all_paths(mode=mode, log_handler=log_handler):
                        count_path += 1
                        all_contig_str = []
                        contigs_are_circular = []
                        for go_contig, this_p_part in enumerate(this_paths):
                            this_contig = broken_graph.export_path(this_p_part)
                            if DEGENERATE_BASES & set(this_contig.seq):
                                degenerate_base_used = True
                            if this_contig.label.endswith("(circular)"):
                                contigs_are_circular.append(True)
                            else:
                                contigs_are_circular.append(False)
                            if len(this_paths) == 1 and contigs_are_circular[-1]:
                                all_contig_str.append(this_contig.fasta_str())
                            else:
                                all_contig_str.append(">contig_" + str(go_contig + 1) + "--" + this_contig.label +
                                                      "\n" + this_contig.seq + "\n")
                        # if len(all_contig_str) == 1 and set(contigs_are_circular) == {True}:
                        #     still_complete.append(True)
                        # else:
                        #     still_complete.append(False)
                        open(prefix + ".graph" + str(go_res + 1) + other_tag + "." + str(count_path) + 
                             ".path_sequence.fasta", "w").write("\n".join(all_contig_str))
                    broken_graph.write_to_gfa(prefix + ".graph" + str(go_res + 1) + ".selected_graph.gfa")
            else:
                for go_res, copy_res in enumerate(copy_results):
                    idealized_graph = copy_res["graph"]
                    # should add making one-step-inversion pairs for paths,
                    # which would be used to identify existence of a certain isomer using mapping information
                    count_path = 0
                    for this_path, other_tag in idealized_graph.get_all_circular_paths(mode=mode, log_handler=log_handler):
                        count_path += 1
                        this_seq_obj = idealized_graph.export_path(this_path)
                        if DEGENERATE_BASES & set(this_seq_obj.seq):
                            degenerate_base_used = True
                        open(prefix + ".graph" + str(go_res + 1) + other_tag + "." + str(count_path) + 
                             ".path_sequence.fasta", "w").write(this_seq_obj.fasta_str())
                    idealized_graph.write_to_gfa(prefix + ".graph" + str(go_res + 1) + ".selected_graph.gfa")
            if degenerate_base_used:
                log_handler.warning("Degenerate base(s) used!")
            time_d = time.time()
            if log_handler:
                log_handler.info(">>> Solving and unfolding graph finished: " + str(round(time_d - time_c, 4)) + "s")
            else:
                sys.stdout.write("\n\n>>> Solving and unfolding graph finished: " + str(round(time_d - time_c, 4)) + "s\n")

    try:
        disentangle_circular_assembly(options.fastg_file, options.tab_file,
                                      os.path.join(options.output_directory, options.prefix),
                                      type_factor=options.type_factor,
                                      mode=options.mode,
                                      weight_factor=options.weight_factor,
                                      log_hard_cov_threshold=options.depth_factor,
                                      contamination_depth=options.contamination_depth,
                                      contamination_similarity=options.contamination_similarity,
                                      degenerate=options.degenerate, degenerate_depth=options.degenerate_depth,
                                      degenerate_similarity=options.degenerate_similarity,
                                      expected_max_size=options.expected_max_size,
                                      expected_min_size=options.expected_min_size,
                                      min_sigma_factor=options.min_sigma_factor,
                                      only_max_c=options.only_keep_max_cov, acyclic_allowed=options.acyclic_allowed,
                                      keep_temp=options.keep_temp_graph,
                                      log_handler=log_handler, verbose=options.verbose, debug=options.debug)
        log_handler = simple_log(logging.getLogger(), options.output_directory, options.prefix + ".disentangle.")

        log_handler.info('\nTotal cost: ' + str(round(time.time() - time0, 4)) + 's\n')
    except Exception as e:
        if options.debug:
            log_handler.exception("")
        else:
            log_handler.exception(str(e))
        log_handler.exception("Disentangling failed!")
        if not options.acyclic_allowed:
            log_handler.info("You might try again with '--linear' to export contig(s) instead of circular genome.")
        log_handler = simple_log(log_handler, options.output_directory, options.prefix + ".disentangle.")
        log_handler.info("\nTotal cost " + str(time.time() - time0))
        log_handler.info("Please email [email protected] if you find bugs!\n")
    logging.shutdown()