def get_num_unassigned(simulation_output_dir):
    fq_fname = '%s/reads_split/cell_unassigned_barcodes.fastq.gz' % \
     (simulation_output_dir)

    num_unassigned = 0
    fq_file = gzip.open(fq_fname, 'rb')
    fq_iter = IO_utils.read_fastq_sequential(fq_file)
    for (lines, _) in fq_iter:
        num_unassigned += 1
    return num_unassigned
def get_fraction_consistent(pred_bc, simulation_output_dir):
    fq_fname = '%s/reads_split/cell_%s_barcodes.fastq.gz' % \
     (simulation_output_dir, pred_bc)
    if not os.path.exists(fq_fname):
        return 0

    fq_file = gzip.open(fq_fname, 'rb')
    fq_iter = IO_utils.read_fastq_sequential(fq_file)

    assignments = Counter()
    total_reads = 0
    for (lines, _) in fq_iter:
        read_name = lines[0]
        assigned_bc = read_name.split(':')[-2].split('_')[0]
        assignments.update([assigned_bc])
        total_reads += 1
    try:
        common_bc, count = assignments.most_common()[0]
        return count / total_reads
    except IndexError:
        return 0
def get_fraction_correct_reads(pred_bc, simulation_output_dir):
    fq_fname = '%s/reads_split/cell_%s_barcodes.fastq.gz' % \
     (simulation_output_dir, pred_bc)
    if not os.path.exists(fq_fname):
        return (0, 0)
    fq_file = gzip.open(fq_fname, 'rb')
    fq_iter = IO_utils.read_fastq_sequential(fq_file)

    tpr = 0.
    fpr = 0.
    for (lines, _) in fq_iter:
        read_name = lines[0]
        assigned_bc = read_name.split(':')[-1]
        true_bc = read_name.split(':')[-2].split('_')[0]

        if (assigned_bc == true_bc):
            tpr += 1.
        else:
            fpr += 1.
    total_reads = tpr + fpr
    tpr /= total_reads
    fpr /= total_reads

    return (tpr, fpr)