def get_num_unassigned(simulation_output_dir): fq_fname = '%s/reads_split/cell_unassigned_barcodes.fastq.gz' % \ (simulation_output_dir) num_unassigned = 0 fq_file = gzip.open(fq_fname, 'rb') fq_iter = IO_utils.read_fastq_sequential(fq_file) for (lines, _) in fq_iter: num_unassigned += 1 return num_unassigned
def get_fraction_consistent(pred_bc, simulation_output_dir): fq_fname = '%s/reads_split/cell_%s_barcodes.fastq.gz' % \ (simulation_output_dir, pred_bc) if not os.path.exists(fq_fname): return 0 fq_file = gzip.open(fq_fname, 'rb') fq_iter = IO_utils.read_fastq_sequential(fq_file) assignments = Counter() total_reads = 0 for (lines, _) in fq_iter: read_name = lines[0] assigned_bc = read_name.split(':')[-2].split('_')[0] assignments.update([assigned_bc]) total_reads += 1 try: common_bc, count = assignments.most_common()[0] return count / total_reads except IndexError: return 0
def get_fraction_correct_reads(pred_bc, simulation_output_dir): fq_fname = '%s/reads_split/cell_%s_barcodes.fastq.gz' % \ (simulation_output_dir, pred_bc) if not os.path.exists(fq_fname): return (0, 0) fq_file = gzip.open(fq_fname, 'rb') fq_iter = IO_utils.read_fastq_sequential(fq_file) tpr = 0. fpr = 0. for (lines, _) in fq_iter: read_name = lines[0] assigned_bc = read_name.split(':')[-1] true_bc = read_name.split(':')[-2].split('_')[0] if (assigned_bc == true_bc): tpr += 1. else: fpr += 1. total_reads = tpr + fpr tpr /= total_reads fpr /= total_reads return (tpr, fpr)