Esempio n. 1
0
def assign_read_kmers(params):
	"""
	Assigns a single read to a cell barcode by kmer compatibility
	args (tuple)
		kmers_to_paths: dict of kmer -> list of paths that contain it
		min_kmer_size
		max_kmer_size
		read: list of fastq entry lines
	"""
	(kmer_map,
		min_kmer_size,
		max_kmer_size,
		(reads_data, reads_offset),
		(barcodes_data, barcodes_offset)) = params
		
	for kmer_size in range(max_kmer_size, min_kmer_size, -1):
		read_kmers = IO_utils.get_cyclic_kmers(
			barcodes_data, 
			kmer_size,
			args['barcode_start'], 
			args['barcode_end'],
			indel = True)
		bcs, is_assigned, is_unique = get_most_common_bc(
			kmer_map, read_kmers)
		if is_assigned and is_unique:
			return (bcs[0], reads_offset, barcodes_offset)
		#outherwise decrement kmer size and try again
	return ('unassigned', reads_offset, barcodes_offset)
Esempio n. 2
0
def build_subgraph(reads_in_subgraph, barcodes_unzipped):
	bc_file = open(barcodes_unzipped, 'rb')
	barcodes_iter = IO_utils.read_fastq_random(
		bc_file, offsets = reads_in_subgraph)
	subgraph_kmer_counts = Counter()
	while(True):
		try:
			barcode_data, _ = next(barcodes_iter)
		except StopIteration:
			break	
		read_kmers = IO_utils.get_cyclic_kmers(
			barcode_data, 
			int(args['kmer_size']),
			int(args['barcode_start']), 
			int(args['barcode_end']))		
		for (kmer, _ ) in read_kmers:
			subgraph_kmer_counts[kmer] += 1
	bc_file.close()
	
	edges = []
	for(kmer, count) in subgraph_kmer_counts.items():
		edge = Edge(kmer[0:-1], kmer[1:], count)
		edges.append(edge)
	subgraph = Graph(edges)
	return subgraph
Esempio n. 3
0
def map_kmers_to_bcs_fixed_k(consensus_bcs, kmer_size):
    kmers_to_paths = {}
    for cell_barcode in consensus_bcs:
        kmers = IO_utils.get_cyclic_kmers(
            ['na', cell_barcode, 'na', cell_barcode],
            kmer_size,
            0,
            len(cell_barcode),
            indel=True)
        for (kmer, _) in kmers:
            if (kmer not in kmers_to_paths.keys()):
                kmers_to_paths[kmer] = []
            kmers_to_paths[kmer].append(cell_barcode)
    return kmers_to_paths
Esempio n. 4
0
def index_read(params):
    """
	Args
		params (tuple):
			barcodes_data (str): sequence of read_1 (barcode)
			barcodes_offset (int): line offset for this read
	Returns
		kmer_index (dict): 
	"""
    (barcodes_data, barcodes_offset) = params

    kmer_index = {}
    read_kmers = IO_utils.get_cyclic_kmers(barcodes_data, args['kmer_size'],
                                           args['barcode_start'],
                                           args['barcode_end'])
    for (kmer, _) in read_kmers:
        if (kmer not in kmer_index.keys()):
            kmer_index[kmer] = []
        kmer_index[kmer].append(barcodes_offset)
    return kmer_index