def assign_read_kmers(params): """ Assigns a single read to a cell barcode by kmer compatibility args (tuple) kmers_to_paths: dict of kmer -> list of paths that contain it min_kmer_size max_kmer_size read: list of fastq entry lines """ (kmer_map, min_kmer_size, max_kmer_size, (reads_data, reads_offset), (barcodes_data, barcodes_offset)) = params for kmer_size in range(max_kmer_size, min_kmer_size, -1): read_kmers = IO_utils.get_cyclic_kmers( barcodes_data, kmer_size, args['barcode_start'], args['barcode_end'], indel = True) bcs, is_assigned, is_unique = get_most_common_bc( kmer_map, read_kmers) if is_assigned and is_unique: return (bcs[0], reads_offset, barcodes_offset) #outherwise decrement kmer size and try again return ('unassigned', reads_offset, barcodes_offset)
def build_subgraph(reads_in_subgraph, barcodes_unzipped): bc_file = open(barcodes_unzipped, 'rb') barcodes_iter = IO_utils.read_fastq_random( bc_file, offsets = reads_in_subgraph) subgraph_kmer_counts = Counter() while(True): try: barcode_data, _ = next(barcodes_iter) except StopIteration: break read_kmers = IO_utils.get_cyclic_kmers( barcode_data, int(args['kmer_size']), int(args['barcode_start']), int(args['barcode_end'])) for (kmer, _ ) in read_kmers: subgraph_kmer_counts[kmer] += 1 bc_file.close() edges = [] for(kmer, count) in subgraph_kmer_counts.items(): edge = Edge(kmer[0:-1], kmer[1:], count) edges.append(edge) subgraph = Graph(edges) return subgraph
def map_kmers_to_bcs_fixed_k(consensus_bcs, kmer_size): kmers_to_paths = {} for cell_barcode in consensus_bcs: kmers = IO_utils.get_cyclic_kmers( ['na', cell_barcode, 'na', cell_barcode], kmer_size, 0, len(cell_barcode), indel=True) for (kmer, _) in kmers: if (kmer not in kmers_to_paths.keys()): kmers_to_paths[kmer] = [] kmers_to_paths[kmer].append(cell_barcode) return kmers_to_paths
def index_read(params): """ Args params (tuple): barcodes_data (str): sequence of read_1 (barcode) barcodes_offset (int): line offset for this read Returns kmer_index (dict): """ (barcodes_data, barcodes_offset) = params kmer_index = {} read_kmers = IO_utils.get_cyclic_kmers(barcodes_data, args['kmer_size'], args['barcode_start'], args['barcode_end']) for (kmer, _) in read_kmers: if (kmer not in kmer_index.keys()): kmer_index[kmer] = [] kmer_index[kmer].append(barcodes_offset) return kmer_index