예제 #1
0
args = parser.parse_args()

inputFile = args.i
outputRaw = args.rawout
outputThreshold = args.tout
threshold = args.threshold
kmerSize = args.k

# Read in JSON-Bead file
#Calculate kmer pools for all beads
kmerPool = []
beadCount = 0
with open(inputFile, 'r') as f:
    for line in f:
        b = bead.beadSequence(json.loads(line))
        kmerPool.append(kmer.kmerCount(b, kmerSize))
        beadCount += 1
print('Found {0} beads.'.format(beadCount))

# Calculate kmer distance for all pairs
edge = []
edgeThreshold = []
n1 = 0
n2 = 0
for pair in combinations(kmerPool, 2):
    D = kmer.kmerDistance((pair[0].set, pair[1].set)).mashDistance()
    edge.append((pair[0].barcode, pair[1].barcode, D))
    if threshold[0] <= D <= threshold[1]:
        edgeThreshold.append((pair[0].barcode, pair[1].barcode, D))
        n2 += 1
    n1 += 1
예제 #2
0
                    default='kmerReport.tsv',
                    help='Report file on Kmers.')
args = parser.parse_args()

inputFile = args.i
outputFile = args.o
kmerSize = args.k
reportFile = args.report

kmerPool = []
report = {}
kmerFrag = []
with open(inputFile, 'r') as f:
    for line in f:
        b = bead.beadSequence(json.loads(line))
        kmers = kmer.kmerCount(b, kmerSize)
        barcode = b.barcode
        kmerPool.append({barcode: kmers.kmers})
        kmerNumber = len(kmers.kmers)
        fragNumber = len(b.fragments)
        kmerFrag.append((kmerNumber, fragNumber))
        report[kmerNumber] = report.get(kmerNumber, 1) + 1
with open(outputFile, 'w') as f:
    for line in kmerPool:
        f.write('{0}\n'.format(json.dumps(line)))

report = sorted([x for x in report.items()], key=lambda i: i[0])

with open(reportFile, 'w') as f:
    f.write('KmerNumber\tCount\n')
    for line in report:
def main():
    # Read in JSON-Bead file
    #Calculate kmer pools for all beads
    kmerPool = []
    beadCount = 0
    with open(inputFile, 'r') as f:
        for line in f:
            b = bead.beadSequence(json.loads(line))
            kmerPool.append(kmer.kmerCount(b, kmerSize))
            beadCount += 1
    print('Found {0} beads.'.format(beadCount))

    # Setup the parallel enviroment
    # Create shared list for store edge list and progress counter
    manager = Manager()
    edge = manager.list([[]] * job)  # n list for edge list
    count = manager.list([0] * job)  # n list for count

    print('Starting mash distance ...')

    # Divide the kmer pair pool
    pairPool = []
    for pair in combinations(kmerPool, 2):
        pairPool.append(pair)
    size = len(pairPool)
    print('Total is {0} pairs.'.format(size))
    step = size // job
    print('Step is {0}'.format(step))
    start = 0

    workers = []
    print(len(pairPool))
    for i in range(job):
        if i + 1 < job:  # not the last job
            workers.append(
                Process(target=kmerDistanceWorker,
                        args=(pairPool[start:start + step], edge, i, count)))
            start += step
            print('Start change to {0}'.format(start))
        else:
            workers.append(
                Process(target=kmerDistanceWorker,
                        args=(pairPool[start:], edge, i, count)))

    print('Starting %i jobs ...' % job)
    count_worker = 1
    for j in workers:
        j.start()
        print('Starting thread No. %i ...' % count_worker)
        count_worker += 1

    job_alive = True
    while job_alive:
        time.sleep(0.01)
        job_alive = False
        for j in workers:
            if j.is_alive():
                job_alive = True
        progress = str(sum(count) / size * 100) + "\r"
        sys.stderr.write(progress)
        #print(len(edge[0]))

    for j in workers:
        j.join()
    print('Finished dereplicating.')

    with open(outputRaw, 'w') as f:
        f.write('Source\tTarget\tDistance\n')
        for item in edge:
            for line in item:
                f.write('{0}\t{1}\t{2}\n'.format(line[0], line[1], line[2]))
예제 #4
0
                    default='kmerReport.tsv',
                    help='A report on kmer distribuion on Kmers.')
args = parser.parse_args()

inputFile = args.i
outputFile = args.o
k = args.k
reportFile = args.report

kmerPool = []
report = {}
kmerFrag = []
with open(inputFile, 'r') as f:
    for line in f:
        b = bead.beadSequence(json.loads(line))
        kmers = kmer.kmerCount(b, k)
        barcode = b.barcode
        kmerPool.append({barcode: kmers.kmers})
        kmerNumber = len(kmers.kmers)
        fragNumber = len(b.fragments)
        kmerFrag.append((kmerNumber, fragNumber))
        report[kmerNumber] = report.get(kmerNumber, 1) + 1
with open(outputFile, 'w') as f:
    for line in kmerPool:
        f.write('{0}\n'.format(json.dumps(line)))

report = sorted([x for x in report.items()], key=lambda i: i[0])

with open(reportFile, 'w') as f:
    f.write('KmerNumber\tCount\n')
    for line in report: