Exemple #1
0
def test_build_hashCounter():
    mh1 = MinHash(0, 21, scaled=1, track_abundance=True)
    mh2 = MinHash(0, 21, scaled=1, track_abundance=True)
    mh1.add_many((1, 2, 3, 4))
    mh2.add_many((1, 2, 5))
    true_res = Counter({1: 2, 2: 2, 3: 1, 4: 1, 5: 1})

    ss1 = SourmashSignature(mh1)
    ss2 = SourmashSignature(mh2)

    counts = Counter()
    hc = build_hashCounter([ss1, ss2], counts)
    print("Hash Counter: ", hc)
    assert hc == true_res
Exemple #2
0
def test_drop_below_mincount_threshold():
    mh1 = MinHash(0, 21, scaled=1, track_abundance=True)
    mh2 = MinHash(0, 21, scaled=1, track_abundance=True)
    mh1.add_many((1, 2, 3, 4))
    mh2.add_many((1, 1, 2, 5))

    ss1 = SourmashSignature(mh1)
    ss2 = SourmashSignature(mh2)

    counts = Counter()
    hc = build_hashCounter([ss1, ss2], counts)
    kept_hashes = drop_below_mincount(hc, 3)
    true_kept = Counter({1: 3})
    print("kept hashes: ", kept_hashes)
    assert kept_hashes == true_kept
                print(f"{hashval}:{ct}")
                if ct < min_count:
                    counts.pop(hashval)
            # write out hashes

            # let's try building a sig. we will use this sig later to intersect with sample-specific sigs
            new_mins = set(counts.keys())
            print(len(new_mins))
            with open(outhashes, "w") as out:
                for hsh in new_mins:
                    out.write(str(hsh) + '\n')
            if len(new_mins) > 0:
                minhash = MinHash(
                    n=0, ksize=ksize, scaled=scaled
                )  # scaled=1 so we keep all (though these were previously at some other scaled val)
                minhash.add_many(set(counts.keys()))
                # write sig to file
                sigobj = sourmash.SourmashSignature(
                    minhash,
                    name=f"aggregated_hashvals_above_{min_count}",
                    filename=f"generated with drop_unique_hashes.py")
                sigobjs += [sigobj]

## this part only handles one output file -- doesn't take care of case with many ksizes/moltypes
with open(outsig, 'wt') as sigout:
    sourmash.save_signatures(sigobjs, sigout)
    #notify('wrote signature to {}', args.output)

# write out hashes to a text file

# this part is from
Exemple #4
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument('hashfile') 					# file that contains hashes
    p.add_argument('-o', '--output', default=None,
                   help='file to output signature to')
    p.add_argument('-k', '--ksize', default=None, type=int)
    p.add_argument('--scaled', default=None, type=int)
    p.add_argument('--num', default=None, type=int)
    p.add_argument('--name', default='', help='signature name')
    p.add_argument('--filename', default='',
                   help='filename to add to signature')
    args = p.parse_args()

    # check arguments.
    if args.scaled and args.num:
        error('cannot specify both --num and --scaled! exiting.')
        return -1

    if not args.ksize:
        error('must specify --ksize')
        return -1

    if not args.output:
        error('must specify --output')
        return -1

    # first, load in all the hashes
    hashes = set()
    for line in open(args.hashfile, 'rt'):
        hashval = int(line.strip())
        hashes.add(hashval)

    if not hashes:
        error("ERROR, no hashes loaded from {}!", args.hashfile)
        return -1

    notify('loaded {} distinct hashes from {}', len(hashes), args.hashfile)

    # now, create the MinHash object that we'll use.
    scaled = 0
    num = 0
    if args.scaled:
        scaled = args.scaled
    elif args.num:
        num = args.num
    else:
        notify('setting --num automatically from the number of hashes.')
        num = len(hashes)

    # construct empty MinHash object according to args
    minhash = MinHash(n=num, ksize=args.ksize, scaled=scaled)

    # add hashes into!
    minhash.add_many(hashes)

    if len(minhash) < len(hashes):
        notify("WARNING: loaded {} hashes, but only {} made it into MinHash.",
               len(hashes), len(minhash))
        if scaled:
            notify("This is probably because of the scaled argument.")
        elif args.num:
            notify("This is probably because your --num is set to {}",
                   args.num)

    if num > len(minhash):
        notify("WARNING: --num set to {}, but only {} hashes in signature.",
               num, len(minhash))

    sigobj = sourmash.SourmashSignature(minhash, name=args.name,
                                        filename=args.filename)

    with open(args.output, 'wt') as fp:
        sourmash.save_signatures([sigobj], fp)
    notify('wrote signature to {}', args.output)