예제 #1
0
def map_unmapped_features(unmapped_features, target_chroms, lifted_feature_list, feature_db, feature_hierarchy,
                          ref_parent_order, args):
    if len(unmapped_features) > 0 and target_chroms[0] != args.target:
        print("mapping unaligned features to whole genome")
        ref_chroms = [args.reference]
        target_chroms = [args.target]
        return liftover_types.map_unmapped_genes_agaisnt_all(unmapped_features, ref_chroms, target_chroms,
                                                             lifted_feature_list, feature_db, feature_hierarchy,
                                                             ref_parent_order, args)
    return unmapped_features
예제 #2
0
def main():
    #parse args
    args = parse_args()
    gff = args.g
    target_fasta = args.t
    reference_fasta = args.r
    processes = int(args.p)
    output = args.o
    chroms_file = args.chroms
    db = args.db
    infer_transcripts = args.infer_transcripts
    infer_genes = args.infer_genes
    unplaced_seq = args.unplaced
    copies = args.copies
    cov_threshold = float(args.a)
    seq_threshold = float(args.s)
    seq_threshold_copies = float(args.sc)
    minimap2_path = args.m
    inter_files = args.dir
    max_alns = int(args.n)
    feature_types_file = args.f

    #read chroms
    if chroms_file is not None:
        ref_chroms, target_chroms = parse_chrm_files(chroms_file)
    else:
        ref_chroms = [reference_fasta]
        target_chroms = [target_fasta]

    parent_features_to_lift = get_parent_features_to_lift(feature_types_file)
    #lift genes
    lifted_feature_list = {}
    unmapped_features = []

    feature_db, parent_features, intermediate_features, children_features, parent_order = liftover_types.lift_original_annotation(
        gff, target_fasta, reference_fasta, ref_chroms, target_chroms,
        processes, db, lifted_feature_list, unmapped_features,
        infer_transcripts, infer_genes, cov_threshold, seq_threshold,
        minimap2_path, inter_files, max_alns, parent_features_to_lift)
    unmapped_out = open(args.u, 'w')
    if len(unmapped_features) > 0 and target_chroms[0] != target_fasta:
        print("mapping unaligned features to whole genome")
        ref_chroms = [reference_fasta]
        target_chroms = [target_fasta]
        unmapped_features = liftover_types.map_unmapped_genes_agaisnt_all(
            unmapped_features, target_fasta, reference_fasta, ref_chroms,
            target_chroms, processes, lifted_feature_list, feature_db,
            parent_features, intermediate_features, children_features,
            parent_order, minimap2_path, inter_files, max_alns)
    if unplaced_seq is not None and chroms_file is not None:
        print("mapping unplaced genes")
        ref_chroms, target_chroms = parse_chrm_files(unplaced_seq)
        target_chroms = [target_fasta]
        liftover_types.map_unplaced_genes(
            unmapped_features, target_fasta, reference_fasta, ref_chroms,
            target_chroms, processes, lifted_feature_list, feature_db,
            parent_features, intermediate_features, children_features,
            parent_order, minimap2_path, inter_files, max_alns)

    for gene in unmapped_features:
        unmapped_out.write(gene.id + "\n")
    unmapped_out.close()
    if copies:
        print("mapping gene copies")
        ref_chroms = [reference_fasta]
        target_chroms = [target_fasta]
        remap = chroms_file is not None
        liftover_types.map_extra_copies(
            target_fasta, reference_fasta, ref_chroms, target_chroms,
            processes, lifted_feature_list, parent_features, children_features,
            feature_db, intermediate_features, parent_order,
            seq_threshold_copies, minimap2_path, inter_files, remap, max_alns)

    write_new_gff.write_new_gff(lifted_feature_list, output, parent_features,
                                cov_threshold, seq_threshold)