Ejemplo n.º 1
0
def align_pairwise(genomes, new_align, r_root_dir, g_root_dir,dirs, run, max_size, chop_mode, mauve_exec, mtype, segtype, min_size):
    """Make a pairwise alignment."""
    # set up directories
    aln_dir = r_root_dir+run+dirs['aln_segs']
    map_dir = r_root_dir+run+dirs['maps']
    # load inputs and process genomes
    seq_dir = g_root_dir
    ref = [Noodle(genome, seq_dir) for genome in genomes
           if genome['order'] == 1][0]
    query = [Noodle(genome, seq_dir) for genome in genomes
             if genome['order'] == 2][0]
    # align if needed
    print "Aligning", ref.name, "and", query.name, "...",
    if new_align:
        mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype)
    # process data segments
    print "Processing segments ..",
    seg_file = aln_dir+ref.name+"_"+query.name+"_segs.txt"
    pair_data = process_segdata(seg_file, ref, query, segtype)
    print "OK"
    # map of query aligned to reference
    print "Mapping ...",
    map_file = map_dir+run+"_"+ref.name+"_vs_"+query.name+".pdf"
    pairwise_draw(ref, query, pair_data, map_file, 'dual', 'dual', 'm',
                  'fct', 'fct', idpt, fct_flags, fct_colors, min_size)
    print "OK\n"
Ejemplo n.º 2
0
def map_cst_aln(run_ref, ref_gbk, genome, scaff_gbk, segs_root, maps_root,
                segtype, min_size, fct_flags, fct_colors, idpt):
    """Generate map of construct aligned to reference."""
    # set inputs and outputs
    g_name = genome['name']
    ref_ctg_n = run_ref.name
    seg_file = segs_root + g_name + "/" + g_name + "_" + ref_ctg_n + "_segs.txt"
    map_file = maps_root + g_name + "_vs_" + ref_ctg_n + ".pdf"
    # start mapping
    try:
        open(scaff_gbk)
    except IOError:
        print "WARNING: No scaffold construct to map"
    else:
        try:
            # load segments TODO: add idp-based clumping
            segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype)
        except IOError:
            msg = "\nERROR: could not load segments data"
            run_ref.log(msg)
            print msg
        except StopIteration:
            msg = "\nERROR: could not make map"
            run_ref.log(msg)
            print msg
        else:
            # offset coordinates where desired
            try:
                g_offset = genome['offset']
                if g_offset[0] != 0 or g_offset[1] != 0:
                    q_len = len(load_genbank(scaff_gbk).seq)
                    segdata = offset_q2r_coords(segdata, q_len, g_offset,
                                                segtype)
                # determine whether to flip the query sequence (negative offset)
                if g_offset[1] < 0:
                    q_invert = True
                else:
                    q_invert = False
            except KeyError:
                g_offset = (0, 0)
                q_invert = False
            # generate graphical map
            pairwise_draw(ref_ctg_n, g_name, ref_gbk, scaff_gbk, segdata,
                          map_file, q_invert, g_offset, 'dual', 'dual', 'm',
                          'fct', 'fct', min_size, fct_flags, fct_colors, idpt)
Ejemplo n.º 3
0
def map_cst_aln(run_ref, ref_gbk, genome, scaff_gbk, segs_root, maps_root,
                segtype, min_size, fct_flags, fct_colors, idpt):
    """Generate map of construct aligned to reference."""
    # set inputs and outputs
    g_name = genome['name']
    ref_ctg_n = run_ref.name
    seg_file = segs_root+g_name+"/"+g_name+"_"+ref_ctg_n+"_segs.txt"
    map_file = maps_root+g_name+"_vs_"+ref_ctg_n+".pdf"
    # start mapping
    try: open(scaff_gbk)
    except IOError:
        print "WARNING: No scaffold construct to map"
    else:
        try:
            # load segments TODO: add idp-based clumping
            segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype)
        except IOError:
                msg = "\nERROR: could not load segments data"
                run_ref.log(msg)
                print msg
        except StopIteration:
                msg = "\nERROR: could not make map"
                run_ref.log(msg)
                print msg
        else:
            # offset coordinates where desired
            try:
                g_offset = genome['offset']
                if g_offset[0] != 0 or g_offset[1] != 0:
                    q_len = len(load_genbank(scaff_gbk).seq)
                    segdata = offset_q2r_coords(segdata, q_len, g_offset,
                                                segtype)
                # determine whether to flip the query sequence (negative offset)
                if g_offset[1] < 0:
                    q_invert = True
                else:
                    q_invert = False
            except KeyError:
            	g_offset = (0,0)
                q_invert = False
            # generate graphical map
            pairwise_draw(ref_ctg_n, g_name, ref_gbk, scaff_gbk, segdata,
                         map_file, q_invert, g_offset, 'dual', 'dual', 'm',
                         'fct', 'fct', min_size, fct_flags, fct_colors, idpt)
Ejemplo n.º 4
0
def align_multi(genomes, new_align, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype, segtype, idpt, fct_flags, fct_colors, min_size):
    """Make a multiple alignment."""
    # set up directories
    aln_dir = r_root_dir+run+dirs['aln_segs']
    map_dir = r_root_dir+run+dirs['maps']
    # load inputs, process and pair up genomes
    seq_dir = g_root_dir
    g_pairs = []
    counter = 1
    while counter < len(genomes):
        g_pairs.append(([Noodle(genome, seq_dir) for genome in genomes if
                            genome['order'] == counter][0],
                        [Noodle(genome, seq_dir) for genome in genomes if
                            genome['order'] == counter+1][0]))
        counter +=1
    # process pairs
    for (ref, query) in g_pairs:
        # align if needed
        print "Aligning", ref.name, "and", query.name, "...",
        if new_align:
            mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode,
                           mauve_exec, mtype)
    # traverse genome pairs
    segdata_list = []
    counter = 0
    for (ref, query) in g_pairs:
        counter +=1
        # process data segments
        print "Processing pair", counter, "segments ...",
        p_seg_file = aln_dir+ref.name+"_"+query.name+"_segs.txt"
        pair_data = process_segdata(p_seg_file, ref, query, segtype)
        segdata_list.append(pair_data)
        print "OK"
        # make a pairwise map while we're at it
        print "Mapping pair", counter, "...",
        p_map_file = map_dir+run+"_"+ref.name+"_vs_"+query.name+".pdf"
        pairwise_draw(ref, query, pair_data, p_map_file, 'dual', 'dual', 'm',
                      'fct', 'fct', idpt, fct_flags, fct_colors, min_size)
        print "OK"
    # map of query aligned to reference
    print "Mapping multiple alignment...",
    map_file = map_dir+run+".pdf"
    multi_draw(g_pairs, segdata_list, map_file, idpt, fct_flags, fct_colors, min_size)
    print "OK\n"
Ejemplo n.º 5
0
def align_multi(genomes, new_align, dirs, run):
    """Make a multiple alignment."""

    # load inputs, process and pair up genomes
    seq_dir = dirs['seqfiles']
    g_pairs = []
    counter = 1
    while counter < len(genomes):
        g_pairs.append(([Noodle(genome, seq_dir) for genome in genomes if
                            genome['order'] == counter][0],
                        [Noodle(genome, seq_dir) for genome in genomes if
                            genome['order'] == counter+1][0]))
        counter +=1
    # process pairs
    for (ref, query) in g_pairs:
        # align if needed
        print "Aligning", ref.name, "and", query.name, "...",
        if new_align:
            mauve_pw_align(ref, query, dirs)
    # traverse genome pairs
    segdata_list = []
    counter = 0
    for (ref, query) in g_pairs:
        counter +=1
        # process data segments
        print "Processing pair", counter, "segments ...",
        p_seg_file = dirs['aln_segs']+"/"+ref.name+"_"+query.name+"_segs.txt"
        pair_data = process_segdata(p_seg_file, ref, query)
        segdata_list.append(pair_data)
        print "OK"
        # make a pairwise map while we're at it
        print "Mapping pair", counter, "...",
        p_map_file = dirs['maps']+run+"_"+ref.name+"_vs_"+query.name+".pdf"
        pairwise_draw(ref, query, pair_data, p_map_file, 'dual', 'dual', 'm',
                      'fct', 'fct')
        print "OK"
    # map of query aligned to reference
    print "Mapping multiple alignment...",
    map_file = dirs['maps']+run+".pdf"
    multi_draw(g_pairs, segdata_list, map_file)
    print "OK\n"
Ejemplo n.º 6
0
def map_ctg_alns(run_ref, ref_gbk, genome, ctg_segs_root, maps_root,
                 fixed_dirs, segtype, min_size, fct_flags, fct_colors, idpt):
    """Generate maps of contigs aligned to reference."""
    # set inputs and outputs
    g_name = genome['name']
    ref_ctg_n = run_ref.name
    segs_root = ctg_segs_root+g_name+"/"
    ctgs_dir = fixed_dirs['gbk_contigs_dir']+g_name+"/"
    # list genbank files in matches directory
    try:
        dir_contents = listdir(segs_root)
    except OSError:
        msg = "\nWARNING: no matching segments"
        run_ref.log(msg)
        print msg
    else:
        for ctg_num in dir_contents:
            ctg_gbk = ctgs_dir+g_name+"_"+ctg_num+".gbk"
            seg_file = segs_root+ctg_num+"/"+ctg_num+"_"+ref_ctg_n+"_segs.txt"
            map_file = maps_root+g_name+"_"+ctg_num+"_vs_"+ref_ctg_n+".pdf"
            # start mapping
            try:
                # load segments TODO: add idp-based clumping
                segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype)
                # deactivate offsetting
                g_offset = (0,0)
                q_invert = False
                # generate graphical map
                pairwise_draw(ref_ctg_n, g_name+"_"+ctg_num, ref_gbk, ctg_gbk,
                             segdata, map_file, q_invert, g_offset, 'dual',
                             'dual', 'm', 'fct', 'fct', min_size,
                             fct_flags, fct_colors, idpt)
            except IOError:
                msg = "\nERROR: could not load segments data"
                run_ref.log(msg)
                print msg
            except StopIteration:
                msg = "\nERROR: could not make map"
                run_ref.log(msg)
                print msg
Ejemplo n.º 7
0
def map_ctg_alns(run_ref, ref_gbk, genome, ctg_segs_root, maps_root,
                 fixed_dirs, segtype, min_size, fct_flags, fct_colors, idpt):
    """Generate maps of contigs aligned to reference."""
    # set inputs and outputs
    g_name = genome['name']
    ref_ctg_n = run_ref.name
    segs_root = ctg_segs_root + g_name + "/"
    ctgs_dir = fixed_dirs['gbk_contigs_dir'] + g_name + "/"
    # list genbank files in matches directory
    try:
        dir_contents = listdir(segs_root)
    except OSError:
        msg = "\nWARNING: no matching segments"
        run_ref.log(msg)
        print msg
    else:
        for ctg_num in dir_contents:
            ctg_gbk = ctgs_dir + g_name + "_" + ctg_num + ".gbk"
            seg_file = segs_root + ctg_num + "/" + ctg_num + "_" + ref_ctg_n + "_segs.txt"
            map_file = maps_root + g_name + "_" + ctg_num + "_vs_" + ref_ctg_n + ".pdf"
            # start mapping
            try:
                # load segments TODO: add idp-based clumping
                segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype)
                # deactivate offsetting
                g_offset = (0, 0)
                q_invert = False
                # generate graphical map
                pairwise_draw(ref_ctg_n, g_name + "_" + ctg_num, ref_gbk,
                              ctg_gbk, segdata, map_file, q_invert, g_offset,
                              'dual', 'dual', 'm', 'fct', 'fct', min_size,
                              fct_flags, fct_colors, idpt)
            except IOError:
                msg = "\nERROR: could not load segments data"
                run_ref.log(msg)
                print msg
            except StopIteration:
                msg = "\nERROR: could not make map"
                run_ref.log(msg)
                print msg
Ejemplo n.º 8
0
def map_ref_segs(run_ref, run_id, r_root_dir, run_dirs, min_size,
                 fct_flags, fct_colors, idpt): 
    """Generate map of reference contig with segment details.

    This provides a comparison of the original reference and the
    re-annotated version.

    """
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir+run_id+"/"
    ori_file = run_ref.file
    ref_maps_root = run_root+run_dirs['ref_map_dir']
    ensure_dir([ref_maps_root])
    gbk_file = run_root+run_dirs['ref_gbk_dir']+ref_n+"_re-annot.gbk"
    map_file = ref_maps_root+ref_n+"_ref.pdf"
    # start mapping
    try:
        # make mock segment, full-length with 100% id
        record = load_genbank(gbk_file)
        length = len(record.seq)
        segdata = [[1, length, 1, length, 100]]
        # deactivate offsetting
        g_offset = (0,0)
        q_invert = False
        # generate graphical map
        pairwise_draw(ref_n+"_ra", ref_n+"_ori", gbk_file, ori_file,
                     segdata, map_file, q_invert, g_offset, 'dual', 'dual',
                     'm', 'fct', 'product', min_size, fct_flags,
                     fct_colors, idpt)
    except IOError:
        msg = "\nERROR: could not load segments data"
        run_ref.log(msg)
        print msg
    except StopIteration:
        msg = "\nERROR: could not make map"
        run_ref.log(msg)
        print msg
Ejemplo n.º 9
0
def map_ref_segs(run_ref, run_id, r_root_dir, run_dirs, min_size, fct_flags,
                 fct_colors, idpt):
    """Generate map of reference contig with segment details.

    This provides a comparison of the original reference and the
    re-annotated version.

    """
    # set inputs and outputs
    ref_n = run_ref.name
    run_root = r_root_dir + run_id + "/"
    ori_file = run_ref.file
    ref_maps_root = run_root + run_dirs['ref_map_dir']
    ensure_dir([ref_maps_root])
    gbk_file = run_root + run_dirs['ref_gbk_dir'] + ref_n + "_re-annot.gbk"
    map_file = ref_maps_root + ref_n + "_ref.pdf"
    # start mapping
    try:
        # make mock segment, full-length with 100% id
        record = load_genbank(gbk_file)
        length = len(record.seq)
        segdata = [[1, length, 1, length, 100]]
        # deactivate offsetting
        g_offset = (0, 0)
        q_invert = False
        # generate graphical map
        pairwise_draw(ref_n + "_ra", ref_n + "_ori", gbk_file, ori_file,
                      segdata, map_file, q_invert, g_offset, 'dual', 'dual',
                      'm', 'fct', 'product', min_size, fct_flags, fct_colors,
                      idpt)
    except IOError:
        msg = "\nERROR: could not load segments data"
        run_ref.log(msg)
        print msg
    except StopIteration:
        msg = "\nERROR: could not make map"
        run_ref.log(msg)
        print msg
Ejemplo n.º 10
0
def align_pairwise(genomes, new_align, dirs, run):
    """Make a pairwise alignment."""
    # load inputs and process genomes
    seq_dir = dirs['seqfiles']
    ref = [Noodle(genome, seq_dir) for genome in genomes
           if genome['order'] == 1][0]
    query = [Noodle(genome, seq_dir) for genome in genomes
             if genome['order'] == 2][0]
    # align if needed
    print "Aligning", ref.name, "and", query.name, "...",
    if new_align:
        mauve_pw_align(ref, query, dirs)
    # process data segments
    print "Processing segments ..",
    seg_file = dirs['aln_segs']+"/"+ref.name+"_"+query.name+"_segs.txt"
    pair_data = process_segdata(seg_file, ref, query)
    print "OK"
    # map of query aligned to reference
    print "Mapping ...",
    map_file = dirs['maps']+run+"_"+ref.name+"_vs_"+query.name+".pdf"
    pairwise_draw(ref, query, pair_data, map_file, 'dual', 'dual', 'm',
                  'fct', 'fct')
    print "OK\n"