Esempio n. 1
0
def main(args):
    graph_type = 'nodegraph'
    report_on_config(args, graphtype=graph_type)
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    graphsize = calculate_graphsize(args, graph_type)
    space_needed = (args.n_tables * graphsize /
                    khmer._buckets_per_byte[graph_type])
    check_space_for_graph(args.output_filename, space_needed, args.force)

    print('Saving k-mer nodegraph to %s' % base, file=sys.stderr)
    print('Loading kmers from sequences in %s' % repr(filenames),
          file=sys.stderr)
    if args.no_build_tagset:
        print('We WILL NOT build the tagset.', file=sys.stderr)
    else:
        print('We WILL build the tagset (for partitioning/traversal).',
              file=sys.stderr)

    print('making nodegraph', file=sys.stderr)
    nodegraph = khmer_args.create_nodegraph(args)

    oxfuncs.build_graph(filenames, nodegraph, args.threads,
                        not args.no_build_tagset)

    print('Total number of unique k-mers: {0}'.format(
        nodegraph.n_unique_kmers()),
          file=sys.stderr)

    print('saving k-mer nodegraph in', base, file=sys.stderr)
    nodegraph.save(base)

    if not args.no_build_tagset:
        print('saving tagset in', base + '.tagset', file=sys.stderr)
        nodegraph.save_tagset(base + '.tagset')

    info_fp = open(base + '.info', 'w')
    info_fp.write('%d unique k-mers' % nodegraph.n_unique_kmers())

    fp_rate = \
        khmer.calc_expected_collisions(
            nodegraph, args.force, max_false_pos=.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print('false positive rate estimated to be %1.3f' % fp_rate,
          file=sys.stderr)
    print('\nfalse positive rate estimated to be %1.3f' % fp_rate,
          file=info_fp)

    print('wrote to ' + base + '.info and ' + base, file=sys.stderr)
    if not args.no_build_tagset:
        print('and ' + base + '.tagset', file=sys.stderr)

    sys.exit(0)
Esempio n. 2
0
def main(args):
    info('build-graph.py', ['graph', 'SeqAn'])

    report_on_config(args, hashtype='nodegraph')
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    # if optimization args are given, do optimization
    args = functions.do_sanity_checking(args, 0.01)

    check_space(args.input_filenames, args.force)
    check_space_for_hashtable(args, 'nodegraph', args.force)

    print('Saving k-mer presence table to %s' % base, file=sys.stderr)
    print('Loading kmers from sequences in %s' %
          repr(filenames), file=sys.stderr)
    if args.no_build_tagset:
        print('We WILL NOT build the tagset.', file=sys.stderr)
    else:
        print('We WILL build the tagset (for partitioning/traversal).',
              file=sys.stderr)

    print('making nodegraph', file=sys.stderr)
    htable = khmer_args.create_nodegraph(args)

    functions.build_graph(filenames, htable, args.threads,
                          not args.no_build_tagset)

    print('Total number of unique k-mers: {0}'.format(htable.n_unique_kmers()),
          file=sys.stderr)

    print('saving k-mer presence table in', base + '.pt', file=sys.stderr)
    htable.save(base + '.pt')

    if not args.no_build_tagset:
        print('saving tagset in', base + '.tagset', file=sys.stderr)
        htable.save_tagset(base + '.tagset')

    info_fp = open(base + '.info', 'w')
    info_fp.write('%d unique k-mers' % htable.n_unique_kmers())

    fp_rate = \
        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print('false positive rate estimated to be %1.3f' % fp_rate,
          file=sys.stderr)
    print('\nfalse positive rate estimated to be %1.3f' % fp_rate,
          file=info_fp)

    print('wrote to', base + '.info and', base + '.pt', file=sys.stderr)
    if not args.no_build_tagset:
        print('and ' + base + '.tagset', file=sys.stderr)

    sys.exit(0)
Esempio n. 3
0
def main(args):
    graph_type = 'nodegraph'
    report_on_config(args, graphtype=graph_type)
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    graphsize = calculate_graphsize(args, graph_type)
    space_needed = (args.n_tables * graphsize /
                    khmer._buckets_per_byte[graph_type])
    check_space_for_graph(args.output_filename, space_needed, args.force)

    print('Saving k-mer nodegraph to %s' % base, file=sys.stderr)
    print('Loading kmers from sequences in %s' %
          repr(filenames), file=sys.stderr)
    if args.no_build_tagset:
        print('We WILL NOT build the tagset.', file=sys.stderr)
    else:
        print('We WILL build the tagset (for partitioning/traversal).',
              file=sys.stderr)

    print('making nodegraph', file=sys.stderr)
    nodegraph = khmer_args.create_nodegraph(args)

    oxfuncs.build_graph(filenames, nodegraph, args.threads,
                        not args.no_build_tagset)

    print('Total number of unique k-mers: {0}'.format(
        nodegraph.n_unique_kmers()), file=sys.stderr)

    print('saving k-mer nodegraph in', base, file=sys.stderr)
    nodegraph.save(base)

    if not args.no_build_tagset:
        print('saving tagset in', base + '.tagset', file=sys.stderr)
        nodegraph.save_tagset(base + '.tagset')

    info_fp = open(base + '.info', 'w')
    info_fp.write('%d unique k-mers' % nodegraph.n_unique_kmers())

    fp_rate = \
        khmer.calc_expected_collisions(
            nodegraph, args.force, max_false_pos=.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print('false positive rate estimated to be %1.3f' % fp_rate,
          file=sys.stderr)
    print('\nfalse positive rate estimated to be %1.3f' % fp_rate,
          file=info_fp)

    print('wrote to ' + base + '.info and ' + base, file=sys.stderr)
    if not args.no_build_tagset:
        print('and ' + base + '.tagset', file=sys.stderr)

    sys.exit(0)
Esempio n. 4
0
def main(args):
    info('build-graph.py', ['graph', 'SeqAn'])

    report_on_config(args, hashtype='hashbits')
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    check_space(args.input_filenames, args.force)
    check_space_for_hashtable(
        (float(args.n_tables * args.min_tablesize) / 8.), args.force)

    print >>sys.stderr, 'Saving k-mer presence table to %s' % base
    print >>sys.stderr, 'Loading kmers from sequences in %s' % repr(filenames)
    if args.no_build_tagset:
        print >>sys.stderr, 'We WILL NOT build the tagset.'
    else:
        print >>sys.stderr, 'We WILL build the tagset', \
                            ' (for partitioning/traversal).'

    print >>sys.stderr, 'making k-mer presence table'
    htable = khmer.new_hashbits(args.ksize, args.min_tablesize, args.n_tables)

    functions.build_graph(filenames, htable, args.threads,
                          not args.no_build_tagset)

    print >> sys.stderr, 'Total number of unique k-mers: {0}'.format(
        htable.n_unique_kmers())

    print >>sys.stderr, 'saving k-mer presence table in', base + '.pt'
    htable.save(base + '.pt')

    if not args.no_build_tagset:
        print >>sys.stderr, 'saving tagset in', base + '.tagset'
        htable.save_tagset(base + '.tagset')

    info_fp = open(base + '.info', 'w')
    info_fp.write('%d unique k-mers' % htable.n_unique_kmers())

    fp_rate = \
        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print >>sys.stderr, 'false positive rate estimated to be %1.3f' % fp_rate
    print >>info_fp, '\nfalse positive rate estimated to be %1.3f' % fp_rate

    print >> sys.stderr, 'wrote to', base + '.info and', base + '.pt'
    if not args.no_build_tagset:
        print >> sys.stderr, 'and ' + base + '.tagset'

    sys.exit(0)
Esempio n. 5
0
def main(args):
    info("build-graph.py", ["graph", "SeqAn"])

    report_on_config(args, graphtype="nodegraph")
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    graphsize = calculate_graphsize(args, "nodegraph")
    check_space_for_graph(args.output_filename, graphsize, args.force)

    print("Saving k-mer nodegraph to %s" % base, file=sys.stderr)
    print("Loading kmers from sequences in %s" % repr(filenames), file=sys.stderr)
    if args.no_build_tagset:
        print("We WILL NOT build the tagset.", file=sys.stderr)
    else:
        print("We WILL build the tagset (for partitioning/traversal).", file=sys.stderr)

    print("making nodegraph", file=sys.stderr)
    nodegraph = khmer_args.create_nodegraph(args)

    oxfuncs.build_graph(filenames, nodegraph, args.threads, not args.no_build_tagset)

    print("Total number of unique k-mers: {0}".format(nodegraph.n_unique_kmers()), file=sys.stderr)

    print("saving k-mer nodegraph in", base, file=sys.stderr)
    nodegraph.save(base)

    if not args.no_build_tagset:
        print("saving tagset in", base + ".tagset", file=sys.stderr)
        nodegraph.save_tagset(base + ".tagset")

    info_fp = open(base + ".info", "w")
    info_fp.write("%d unique k-mers" % nodegraph.n_unique_kmers())

    fp_rate = khmer.calc_expected_collisions(nodegraph, args.force, max_false_pos=0.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print("false positive rate estimated to be %1.3f" % fp_rate, file=sys.stderr)
    print("\nfalse positive rate estimated to be %1.3f" % fp_rate, file=info_fp)

    print("wrote to " + base + ".info and " + base, file=sys.stderr)
    if not args.no_build_tagset:
        print("and " + base + ".tagset", file=sys.stderr)

    sys.exit(0)
Esempio n. 6
0
def main(args):
    info("build-graph.py", ["graph", "SeqAn"])

    report_on_config(args, hashtype="hashbits")
    base = args.output_filename
    filenames = args.input_filenames

    for fname in args.input_filenames:
        check_input_files(fname, args.force)

    check_space(args.input_filenames, args.force)
    check_space_for_hashtable((float(args.n_tables * args.min_tablesize) / 8.0), args.force)

    print("Saving k-mer presence table to %s" % base, file=sys.stderr)
    print("Loading kmers from sequences in %s" % repr(filenames), file=sys.stderr)
    if args.no_build_tagset:
        print("We WILL NOT build the tagset.", file=sys.stderr)
    else:
        print("We WILL build the tagset (for partitioning/traversal).", file=sys.stderr)

    print("making k-mer presence table", file=sys.stderr)
    htable = khmer.new_hashbits(args.ksize, args.min_tablesize, args.n_tables)

    functions.build_graph(filenames, htable, args.threads, not args.no_build_tagset)

    print("Total number of unique k-mers: {0}".format(htable.n_unique_kmers()), file=sys.stderr)

    print("saving k-mer presence table in", base + ".pt", file=sys.stderr)
    htable.save(base + ".pt")

    if not args.no_build_tagset:
        print("saving tagset in", base + ".tagset", file=sys.stderr)
        htable.save_tagset(base + ".tagset")

    info_fp = open(base + ".info", "w")
    info_fp.write("%d unique k-mers" % htable.n_unique_kmers())

    fp_rate = khmer.calc_expected_collisions(htable, args.force, max_false_pos=0.15)
    # 0.18 is ACTUAL MAX. Do not change.

    print("false positive rate estimated to be %1.3f" % fp_rate, file=sys.stderr)
    print("\nfalse positive rate estimated to be %1.3f" % fp_rate, file=info_fp)

    print("wrote to", base + ".info and", base + ".pt", file=sys.stderr)
    if not args.no_build_tagset:
        print("and " + base + ".tagset", file=sys.stderr)

    sys.exit(0)