Beispiel #1
2
                   nargs='?',
                   default=None,
                   type=str,
                   help="the output summary file\n"
                   "[stdout if not present]")

    return vars(p.parse_args())


if __name__ == "__main__":
    args = read_params(sys.argv)

    cscores = collections.defaultdict(set)
    fwmarkers = {}
    maps = collections.defaultdict(set)
    tree = ppa.PpaTree(args['taxonomy'])
    clades2terms = ppa.clades2terms(tree.tree)

    clades2taxa = dict([(clade.full_name,
                         set([-int(taxon.name[4:]) for taxon in taxa]))
                        for clade, taxa in clades2terms.items()])

    for l in open(args['cscores']):
        gene_seed, clade, n, n_tot, coreness = l.strip().split('\t')
        gene_seed, clade, n, n_tot, coreness = int(gene_seed), clade, int(
            n), int(n_tot), float(coreness)
        cscores[gene_seed].add((clade, n, n_tot, coreness))

    for i, l in enumerate(open(args['fwmarkers'])):
        taxa_id, gene_seed, clade, n, n_tot, coreness, n_ext_seeds, n_ext_taxa, uniqueness, ext_taxa = l.strip(
        ).split('\t')
                   default=None,
                   type=str,
                   help="the input tree")
    p.add_argument('out_file',
                   nargs='?',
                   default=None,
                   type=str,
                   help="the output file (b2zipped if ending with '.bz2')\n"
                   "[stdout if not present]")
    p.add_argument(
        '-n',
        action='store_true',
        help="Distances normalized with respect to the total branch length")

    return vars(p.parse_args())


if __name__ == "__main__":
    args = read_params(sys.argv)
    ppatree = ppa.PpaTree(args['intree'])

    dists = ppa.dist_matrix(ppatree.tree)
    tbl = ppatree.tree.total_branch_length() if args['n'] else 1.0
    #tbl = ppatree.tree.total_branch_length()-1.0 if args['n'] else 1.0

    with utils.openw(args['out_file']) as out:
        for k1, v1 in dists.items():
            for k2, v2 in v1.items():
                if k1 < k2:
                    out.write("\t".join([k1, k2, str(v2 / tbl)]) + "\n")

def read_params( args ):
    p = ap.ArgumentParser(
            description='Select top markers')

    p.add_argument( 'infile', nargs='?', default=None, type=str,
            help=   "the input marker file [stdin if not present]")
    p.add_argument('outfile', nargs='?', default=None, type=str,
            help=   "the output core file [stdout if not present]")
    p.add_argument('-n', metavar="Maximum number of markers to be selected"
                                 "for each clade",
            default=100, type=int )
    p.add_argument('--th', metavar="Threshold on markerness",
            default=None, type=str )

    return vars( p.parse_args() )


if __name__ == "__main__":
    args = read_params( sys.argv )
    tree = ppa.PpaTree( None )
    markers = tree.select_markers( args['infile'], markerness_th = args['th'], max_markers = args['n'] )

    with open( args['outfile'], "w" ) as outf:
        for clade in markers:
            for m in clade:
                outf.write( "\n".join( m ) + "\n" )


    p = ap.ArgumentParser(description='Profile ChocoPhlAn genes\n')

    p.add_argument( '--markers', required = True, default=None, type=str )
    p.add_argument( '--euk_taxonomy', required = True, default=None, type=str )
    #p.add_argument( '--mic_taxonomy', required = True, default=None, type=str )
    p.add_argument('out', nargs='?', default=None, type=str,
            help=   "the output summary file\n"
                    "[stdout if not present]")

    return vars( p.parse_args() )

if __name__ == "__main__":
    args = read_params( sys.argv )

    cen2data = {}
    etree = ppa.PpaTree( args['euk_taxonomy'] )
    #vtree = ppa.PpaTree( args['vir_taxonomy'] )
    #mtree = ppa.PpaTree( args['mic_taxonomy'] )
    #vall = set([a.name for a in vtree.tree.get_terminals()])
    #mall = set([int(a.name[3:]) for a in mtree.tree.get_terminals()])
    eall = set([a.name for a in etree.tree.get_terminals()])

    lin = (l.split('\t') for l in open(args['markers']))
    for d1,cen,taxa,tax,d2,d3,coreness,d4,d5,d6,d7,tin,tout,d8,tsin,tsout in lin:
        tsin,tsout = set(["t__"+a for a in tsin.strip().split(":") if a]),set(["t__"+b for b in tsout.strip().split(":") if b])
        #tsin,tsout = set([int(a) for a in tsin.strip().split(":") if a]),set([int(b) for b in tsout.strip().split(":") if b])
        cen2data[int(cen)] = {'taxa':int(taxa),'tax':tax,'coreness':float(coreness),'tsin':tsin,'tsout':tsout}


    tax2cen = collections.defaultdict( set )
    for k,v in cen2data.items():
    p.add_argument('--mic_taxonomy', required=True, default=None, type=str)
    p.add_argument('out',
                   nargs='?',
                   default=None,
                   type=str,
                   help="the output summary file\n"
                   "[stdout if not present]")

    return vars(p.parse_args())


if __name__ == "__main__":
    args = read_params(sys.argv)

    cen2data = {}
    vtree = ppa.PpaTree(args['vir_taxonomy'])
    mtree = ppa.PpaTree(args['mic_taxonomy'])
    vall = set([a.name for a in vtree.tree.get_terminals()])
    #mall = set([int(a.name[3:]) for a in mtree.tree.get_terminals()])
    mall = set([a.name for a in mtree.tree.get_terminals()])

    lin = (l.split('\t') for l in open(args['markers']))
    for d1, cen, taxa, tax, d2, d3, coreness, d4, d5, d6, d7, tin, tout, d8, tsin, tsout in lin:
        tsin, tsout = set(
            ["t__" + a for a in tsin.strip().split(":")
             if a]), set(["t__" + b for b in tsout.strip().split(":") if b])
        #tsin,tsout = set([int(a) for a in tsin.strip().split(":") if a]),set([int(b) for b in tsout.strip().split(":") if b])
        cen2data[int(cen)] = {
            'taxa': int(taxa),
            'tax': tax,
            'coreness': float(coreness),