def make(args):
    if args.vcf is not None:
        logging.info("Will create from vcf file")
        reference = Fasta(args.reference_fasta_file)

        chromosome = args.chromosome
        numeric_chromosome = chromosome
        if chromosome == "X":
            numeric_chromosome = "23"
        elif chromosome == "Y":
            numeric_chromosome = "24"

        variants = VcfVariants.from_vcf(args.vcf,
                                        limit_to_chromosome=numeric_chromosome)
        ref_sequence = str(reference[args.chromosome])
        logging.info("Extracted sequence for chromosome %s. Length is: %d" %
                     (chromosome, len(ref_sequence)))
        logging.info("There are %d variants in chromosome" % len(variants))

        constructor = GraphConstructor(ref_sequence, variants)
        graph = constructor.get_graph_with_dummy_nodes()
        graph.to_file(args.out_file_name)
    else:
        logging.info("Will create from files %s" % args.vg_json_files)
        graph = Graph.from_vg_json_files(args.vg_json_files)
        graph.to_file(args.out_file_name)
    def make_unique_variant_kmers(args):
        logging.info("Reading kmer index")
        kmer_index = CollisionFreeKmerIndex.from_file(args.kmer_index)
        to_shared_memory(kmer_index, "kmer_index_shared")
        logging.info("Reading variant to nodes")
        variant_to_nodes = VariantToNodes.from_file(args.variant_to_nodes)
        to_shared_memory(variant_to_nodes, "variant_to_nodes_shared")
        logging.info("REading graph")
        graph = Graph.from_file(args.graph)
        to_shared_memory(graph, "graph_shared")
        logging.info("Reading all variants")
        variants = VcfVariants.from_vcf(args.vcf,
                                        skip_index=True,
                                        make_generator=True)
        variants = variants.get_chunks(chunk_size=args.chunk_size)
        pool = Pool(args.n_threads)

        all_flat_kmers = []
        for flat_kmers in pool.starmap(make_unique_variant_kmers_single_thread,
                                       zip(variants, repeat(args))):
            all_flat_kmers.append(flat_kmers)

        logging.info("Merge all flat kmers")
        merged_flat = FlatKmers.from_multiple_flat_kmers(all_flat_kmers)
        merged_flat.to_file(args.out_file_name)
        logging.info("Wrote to file %s" % args.out_file_name)
 def make_variant_to_nodes(args):
     from .variant_to_nodes import VariantToNodes
     graph = Graph.from_file(args.graph)
     variants = VcfVariants.from_vcf(args.vcf)
     variant_to_nodes = VariantToNodes.from_graph_and_variants(
         graph, variants)
     variant_to_nodes.to_file(args.out_file_name)
     logging.info("Wrote to file %s" % args.out_file_name)
def make_haplotype_to_nodes(args):
    graph = Graph.from_file(args.graph_file_name)
    variants = VcfVariants.from_vcf(args.vcf_file_name)
    haplotype_to_nodes = HaplotypeToNodes.from_graph_and_variants(
        graph, variants, args.n_haplotypes)
    logging.info("Saving to file")
    haplotype_to_nodes.to_file(args.out_file_name)
    logging.info("Wrote to file %s" % args.out_file_name)
    def validate_graph(args):
        variants = VcfVariants.from_vcf(args.vcf)
        graph = Graph.from_file(args.graph)

        for i, variant in enumerate(variants):
            if i % 1000 == 0:
                logging.info("%d variants processed" % i)

            ref_node, var_node = graph.get_variant_nodes(variant)
def add_allele_frequencies(args):
    logging.info("Reading graph")
    graph = Graph.from_file(args.graph_file_name)
    variants = VcfVariants.from_vcf(args.vcf_file_name,
                                    limit_to_chromosome=args.chromosome,
                                    skip_index=True)
    graph.set_allele_frequencies_from_variants(
        variants, use_chromosome=1
    )  # Use chromosome 1 because we always assume this is a single-chromosome graph
    graph.to_file(args.graph_file_name)
    logging.info("Wrote modified graph to the same file %s" %
                 args.graph_file_name)
 def make_random_haplotypes(args):
     graph = Graph.from_file(args.graph)
     variants = VcfVariants.from_vcf(args.vcf_file_name, skip_index=True)
     haplotype_nodes = HaplotypeToNodes.make_from_n_random_haplotypes(
         graph,
         variants,
         n_haplotypes=args.n_haplotypes,
         weight_by_allele_frequency=not args.no_frequency_weighting)
     logging.info(
         "Making new haplotypenodes by traversing full graph for each haplotype"
     )
     new = haplotype_nodes.get_new_by_traversing_graph(
         graph, args.n_haplotypes)
     new.to_file(args.out_file_name)
     logging.info("Wrote haplotypenodes to %s" % args.out_file_name)
    def make_genotype_matrix(args):
        from .genotype_matrix import GenotypeMatrix
        variants = VcfVariants.from_vcf(args.vcf_file_name,
                                        skip_index=True,
                                        limit_to_n_lines=None,
                                        make_generator=True)

        if args.node_to_haplotypes is not None:
            graph = Graph.from_file(args.graph)
            nodes_to_haplotypes = NodeToHaplotypes.from_file(
                args.node_to_haplotypes)
            matrix = GenotypeMatrix.from_nodes_to_haplotypes_and_variants(
                nodes_to_haplotypes, variants, graph, args.n_individuals)
        else:
            logging.info("Making genotype matrix directly from vcf")
            matrix = GenotypeMatrix.from_variants(variants,
                                                  args.n_individuals,
                                                  args.n_variants,
                                                  n_threads=args.n_threads,
                                                  chunk_size=args.chunk_size)

        matrix.to_file(args.out_file_name)
def add_indel_nodes2(args):
    variants = VcfVariants.from_vcf(args.vcf_file_name)
    graph = Graph.from_file(args.graph_file_name)
    adder = DummyNodeAdder(graph, variants)
    new_graph = adder.create_new_graph_with_dummy_nodes()
    new_graph.to_file(args.out_file_name)