Ejemplo n.º 1
0
def publocus(args):
    """
    %prog publocus idsfile > idsfiles.publocus

    Given a list of model identifiers, convert each into a GenBank approved
    pub_locus.

    Example output:
    Medtr1g007020.1		MTR_1g007020
    Medtr1g007030.1		MTR_1g007030
    Medtr1g007060.1		MTR_1g007060A
    Medtr1g007060.2		MTR_1g007060B
    """
    from jcvi.utils.cbook import AutoVivification

    p = OptionParser(publocus.__doc__)
    p.add_option("--locus_tag", default="MTR_",
                 help="GenBank locus tag [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    locus_tag = opts.locus_tag

    index = AutoVivification()
    idsfile, = args
    fp = must_open(idsfile)
    for row in fp:
        locus, chrom, sep, rank, iso = atg_name(row, retval="locus,chr,sep,rank,iso")
        if None in (locus, chrom, sep, rank, iso):
            logging.warning("{0} is not a valid gene model identifier".format(row))
            continue
        if locus not in index.keys():
            pub_locus = gene_name(chrom, rank, prefix=locus_tag, sep=sep)
            index[locus]['pub_locus'] = pub_locus
            index[locus]['isos'] = set()

        index[locus]['isos'].add(int(iso))

    for locus in index:
        pub_locus = index[locus]['pub_locus']
        Index[locus]['isos'] = sorted(index[locus]['isos'])
        if len(index[locus]['isos']) > 1:
            new = [chr(n+64) for n in index[locus]['isos'] if n < 27]
            for i, ni in zip(index[locus]['isos'], new):
                print "\t".join(x for x in ("{0}.{1}".format(locus, i), \
                                            "{0}{1}".format(pub_locus, ni)))
        else:
            print "\t".join(x for x in ("{0}.{1}".format(locus, index[locus]['isos'][0]), \
                                        pub_locus))
Ejemplo n.º 2
0
def group(args):
    """
    %prog group tabfile > tabfile.grouped

    Given a tab-delimited file, either group all elements within the file or
    group the elements in the value column(s) based on the key (groupby) column

    For example, convert this | into this
    ---------------------------------------
    a	2    3    4           | a,2,3,4,5,6
    a	5    6                | b,7,8
    b	7    8                | c,9,10,11
    c	9                     |
    c 	10   11               |

    If grouping by a particular column,
    convert this              | into this:
    ---------------------------------------------
    a	2    3    4           | a	2,5   3,6   4
    a	5    6                | b	7     8
    b	7    8                | c	9,10  11
    c	9                     |
    c 	10   11               |

    By default, it uniqifies all the grouped elements
    """
    from jcvi.utils.cbook import AutoVivification
    from jcvi.utils.grouper import Grouper

    p = OptionParser(group.__doc__)
    p.set_sep()
    p.add_option("--groupby", default=None, type='int',
                 help="Default column to groupby [default: %default]")
    p.add_option("--groupsep", default=',',
                 help="Separator to join the grouped elements [default: `%default`]")
    p.add_option("--nouniq", default=False, action="store_true",
                 help="Do not uniqify the grouped elements [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    tabfile, = args
    sep = opts.sep
    groupby = opts.groupby
    groupsep = opts.groupsep

    cols = []
    grouper = AutoVivification() if groupby is not None else Grouper()
    fp = must_open(tabfile)
    for row in fp:
        row = row.rstrip()
        atoms = row.split(sep)
        if groupby is not None:
            if len(cols) < len(atoms):
                cols = [x for x in xrange(len(atoms))]
            if groupby not in cols:
                logging.error("groupby col index `{0}` is out of range".format(groupby))
                sys.exit()

            key = atoms[groupby]
            for col in cols:
                if col == groupby:
                    continue
                if not grouper[key][col]:
                    grouper[key][col] = [] if opts.nouniq else set()
                if col < len(atoms):
                    if groupsep in atoms[col]:
                        for atom in atoms[col].split(groupsep):
                            if opts.nouniq:
                                grouper[key][col].append(atom)
                            else:
                                grouper[key][col].add(atom)
                    else:
                        if opts.nouniq:
                            grouper[key][col].append(atoms[col])
                        else:
                            grouper[key][col].add(atoms[col])
        else:
            grouper.join(*atoms)

    for key in grouper:
        if groupby is not None:
            line = []
            for col in cols:
                if col == groupby:
                    line.append(key)
                elif col in grouper[key].keys():
                    line.append(groupsep.join(grouper[key][col]))
                else:
                    line.append("na")
            print sep.join(line)
        else:
            print groupsep.join(key)