Ejemplos de jointplot en Python, ejemplos de basenji.plots.jointplot en Python

Ejemplo n.º 1

0

Mostrar archivo

def quantile_accuracy(gene_targets, gene_preds, gene_stat, out_pdf, numq=4):
  ''' Plot accuracy (PearsonR) in quantile bins across targets. '''

  # plot PearsonR in variance statistic bins
  quant_indexes = stats.quantile_indexes(gene_stat, numq)

  quantiles_series = []
  targets_series = []
  pcor_series = []

  for qi in range(numq):
    # slice quantile
    gene_targets_quant = gene_targets[quant_indexes[qi]].astype('float32')
    gene_preds_quant = gene_preds[quant_indexes[qi]].astype('float32')

    # compute target PearsonR
    for ti in range(gene_targets_quant.shape[1]):
      pcor, _ = pearsonr(gene_targets_quant[:,ti],
                          gene_preds_quant[:,ti])

      quantiles_series.append(qi)
      targets_series.append(ti)
      pcor_series.append(pcor)

  # construct DataFrame
  df_quant = pd.DataFrame({'Quantile':quantiles_series,
                            'Target':targets_series,
                            'PearsonR':pcor_series})
  df_quant.to_csv('%s.csv' % out_pdf[:-4])

  # print summary table
  table_out = open('%s.txt' % out_pdf[:-4], 'w')
  for qi in range(numq):
    quantile_cors = df_quant[df_quant.Quantile == qi].PearsonR
    print('%2d  %.4f  %.4f' % \
          (qi, np.mean(quantile_cors),np.median(quantile_cors)),
          file=table_out)
  table_out.close()

  # construct figure
  plt.figure()

  # plot individual targets as light lines
  for ti in range(gene_targets.shape[1]):
    df_quant_target = df_quant[df_quant.Target == ti]
    plt.plot(df_quant_target.Quantile, df_quant_target.PearsonR, alpha=0.1)

  # plot PearsonR distributions in quantiles
  sns.violinplot(x='Quantile', y='PearsonR', data=df_quant, color='tomato')

  plt.savefig(out_pdf)
  plt.close()

  # sort targets by their decrease
  target_ratios = []
  for ti in range(gene_targets.shape[1]):
    df_quant_target = df_quant[df_quant.Target == ti]
    assert(df_quant_target.Quantile.iloc[0] == 0)
    assert(df_quant_target.Quantile.iloc[-1] == numq-1)
    cor_ratio = df_quant_target.PearsonR.iloc[-1] / df_quant_target.PearsonR.iloc[0]
    target_ratios.append((cor_ratio,ti))
  target_ratios = sorted(target_ratios)

  # take 10 samples across
  pct_indexes = np.linspace(0, len(target_ratios)-1, 10+1).astype('int')

  # write quantile targets
  table_out = open('%s_qt.txt' % out_pdf[:-4], 'w')
  sns.set(font_scale=1.2, style='ticks')

  # scatter plot each quantile
  for qi in range(numq):
    # slice quantile
    gene_targets_quant = gene_targets[quant_indexes[qi]].astype('float32')
    gene_preds_quant = gene_preds[quant_indexes[qi]].astype('float32')

    for pqi in range(len(pct_indexes)):
      pct_i = pct_indexes[pqi]
      ti = target_ratios[pct_i][1]

      print(qi, pqi, ti, target_ratios[ti], file=table_out)

      qout_pdf = '%s_pq%d_q%d.pdf' % (out_pdf[:-4], pqi, qi)
      plots.jointplot(gene_targets_quant[:,ti], gene_preds_quant[:,ti],
                              qout_pdf, alpha=0.8, point_size=8, kind='reg',
                              figsize=5, x_label='log2 Experiment',
                              y_label='log2 Prediction')

  table_out.close()

Ejemplo n.º 2

0

Mostrar archivo

def variance_accuracy(gene_targets, gene_preds, out_prefix, log_pseudo=None):
  """ Compare MSE accuracy to gene mean and variance.

    Assumes the targets and predictions have been normalized.
    """

  # compute mean, var, and MSE across targets
  print('gene_targets', gene_targets.shape)
  gene_mean = np.mean(gene_targets, axis=1, dtype='float64')
  gene_max = np.max(gene_targets, axis=1)
  gene_std = np.std(gene_targets, axis=1, dtype='float64')
  gene_mse = np.power(gene_targets - gene_preds, 2).mean(axis=1, dtype='float64')

  # filter for sufficient expression
  expr_indexes = (gene_mean > 0.5) & (gene_max > 3)
  gene_targets = gene_targets[expr_indexes,:]
  gene_preds = gene_preds[expr_indexes,:]
  gene_mse = gene_mse[expr_indexes]
  gene_mean = gene_mean[expr_indexes]
  gene_std = gene_std[expr_indexes]
  print('%d "expressed genes" considered in variance plots' % expr_indexes.sum())


  sns.set(style='ticks', font_scale=1.3)
  if len(gene_mse) < 2000:
    ri = np.arange(len(gene_mse))
  else:
    ri = np.random.choice(np.arange(len(gene_mse)), 2000, replace=False)

  # plot mean vs std
  out_pdf = '%s_mean-std.pdf' % out_prefix
  plots.jointplot(gene_mean[ri], gene_std[ri], out_pdf, point_size=10,
    cor='spearmanr', x_label='Mean across experiments', y_label='Std Dev across experiments')

  # plot mean vs MSE
  out_pdf = '%s_mean.pdf' % out_prefix
  plots.jointplot(gene_mean[ri], gene_mse[ri], out_pdf, point_size=10,
    cor='spearmanr', x_label='Mean across experiments', y_label='Mean squared prediction error')

  # plot std vs MSE
  out_pdf = '%s_std.pdf' % out_prefix
  plots.jointplot(gene_std[ri], gene_mse[ri], out_pdf, point_size=10,
    cor='spearmanr', x_label='Std Dev across experiments', y_label='Mean squared prediction error')

  # plot CV vs MSE
  gene_cv = np.divide(gene_std, gene_mean)
  out_pdf = '%s_cv.pdf' % out_prefix
  plots.jointplot(gene_cv[ri], gene_mse[ri], out_pdf, point_size=10,
    cor='spearmanr', x_label='Coef Var across experiments', y_label='Mean squared prediction error')


  # plot MSE distributions in CV bins
  numq = 5
  quant_indexes = stats.quantile_indexes(gene_cv, numq)
  quant_mse = []
  for qi in range(numq):
    for gi in quant_indexes[qi]:
      quant_mse.append([qi, gene_mse[gi]])
  quant_mse = pd.DataFrame(quant_mse, columns=['Quantile','MSE'])

  quant_mse.to_csv('%s_quant.txt' % out_prefix, sep='\t')

  plt.figure()
  sns.boxplot(x='Quantile', y='MSE', data=quant_mse, palette=sns.cubehelix_palette(numq), showfliers=False)
  ax = plt.gca()
  ax.grid(True, linestyle=':')
  ax.set_ylabel('Mean squared prediction error')
  plt.savefig('%s_quant.pdf' % out_prefix)
  plt.close()

  # CV quantiles
  quantile_accuracy(gene_targets, gene_preds, gene_cv, '%s_qcv.pdf'%out_prefix, 4)

  # stdev quantiles
  quantile_accuracy(gene_targets, gene_preds, gene_std, '%s_qstd.pdf'%out_prefix, 4)

Ejemplo n.º 3

0

Mostrar archivo

def replicate_correlations(replicate_lists,
                           gene_targets,
                           gene_preds,
                           target_indexes,
                           out_prefix,
                           scatter_plots=False):
  """ Study replicate correlations. """

  # for intersections
  target_set = set(target_indexes)

  rep_cors = []
  pred_cors = []

  table_out = open('%s.txt' % out_prefix, 'w')
  sns.set(style='ticks', font_scale=1.3)
  num_genes = gene_targets.shape[0]

  li = 0
  replicate_labels = sorted(replicate_lists.keys())

  for label in replicate_labels:
    if len(replicate_lists[label]) > 1 and target_set & set(
        replicate_lists[label]):
      ti1 = replicate_lists[label][0]
      ti2 = replicate_lists[label][1]

      # retrieve targets
      gene_targets_rep1 = np.log2(gene_targets[:, ti1].astype('float32') + 1)
      gene_targets_rep2 = np.log2(gene_targets[:, ti2].astype('float32') + 1)

      # retrieve predictions
      gene_preds_rep1 = np.log2(gene_preds[:, ti1].astype('float32') + 1)
      gene_preds_rep2 = np.log2(gene_preds[:, ti2].astype('float32') + 1)

      #####################################
      # replicate

      # compute replicate correlation
      rcor, _ = pearsonr(gene_targets_rep1, gene_targets_rep2)
      rep_cors.append(rcor)

      # scatter plot rep vs rep
      if scatter_plots:
        out_pdf = '%s_s%d.pdf' % (out_prefix, li)
        gene_indexes = np.random.choice(range(num_genes), 1000, replace=False)
        plots.regplot(
            gene_targets_rep1[gene_indexes],
            gene_targets_rep2[gene_indexes],
            out_pdf,
            poly_order=3,
            alpha=0.3,
            x_label='log2 Replicate 1',
            y_label='log2 Replicate 2')

      #####################################
      # prediction

      # compute prediction correlation
      pcor1, _ = pearsonr(gene_targets_rep1, gene_preds_rep1)
      pcor2, _ = pearsonr(gene_targets_rep2, gene_preds_rep2)
      pcor = 0.5 * pcor1 + 0.5 * pcor2
      pred_cors.append(pcor)

      # scatter plot vs pred
      if scatter_plots:
        # scatter plot rep vs pred
        out_pdf = '%s_s%d_rep1.pdf' % (out_prefix, li)
        plots.regplot(
            gene_targets_rep1[gene_indexes],
            gene_preds_rep1[gene_indexes],
            out_pdf,
            poly_order=3,
            alpha=0.3,
            x_label='log2 Experiment',
            y_label='log2 Prediction')

        # scatter plot rep vs pred
        out_pdf = '%s_s%d_rep2.pdf' % (out_prefix, li)
        plots.regplot(
            gene_targets_rep2[gene_indexes],
            gene_preds_rep2[gene_indexes],
            out_pdf,
            poly_order=3,
            alpha=0.3,
            x_label='log2 Experiment',
            y_label='log2 Prediction')

      #####################################
      # table

      print(
          '%4d  %4d  %4d  %7.4f  %7.4f  %s' % (li, ti1, ti2, rcor, pcor, label),
          file=table_out)

      # update counter
      li += 1

  table_out.close()

  #######################################################
  # scatter plot replicate versus prediction correlation

  rep_cors = np.array(rep_cors)
  pred_cors = np.array(pred_cors)

  out_pdf = '%s_scatter.pdf' % out_prefix
  plots.jointplot(
      rep_cors,
      pred_cors,
      out_pdf,
      square=True,
      x_label='Replicate R',
      y_label='Prediction R')

Ejemplo n.º 4

0

Mostrar archivo

def cor_table(gene_targets,
              gene_preds,
              target_ids,
              target_labels,
              target_indexes,
              out_file,
              draw_plots=False):
  """ Print a table and plot the distribution of target correlations. """

  table_out = open(out_file, 'w')
  cors = []
  cors_nz = []

  for ti in target_indexes:
    # convert targets and predictions to float32
    gti = np.array(gene_targets[:, ti], dtype='float32')
    gpi = np.array(gene_preds[:, ti], dtype='float32')

    # log transform
    gti = np.log2(gti + 1)
    gpi = np.log2(gpi + 1)

    # compute correlations
    scor, _ = spearmanr(gti, gpi)
    pcor, _ = pearsonr(gti, gpi)
    cors.append(pcor)

    # compute non-zero correlations
    nzi = (gti > 0)
    scor_nz, _ = spearmanr(gti[nzi], gpi[nzi])
    pcor_nz, _ = pearsonr(gti[nzi], gpi[nzi])
    cors_nz.append(pcor_nz)

    # print
    cols = (ti, scor, pcor, scor_nz, pcor_nz, target_ids[ti], target_labels[ti])
    print('%-4d  %7.3f  %7.3f  %7.3f  %7.3f  %s %s' % cols, file=table_out)

  cors = np.array(cors)
  cors_nz = np.array(cors_nz)
  table_out.close()

  if draw_plots:
    # plot correlation distribution
    out_base = os.path.splitext(out_file)[0]
    sns.set(style='ticks', font_scale=1.3)

    # plot correlations versus target signal
    gene_targets_log = np.log2(gene_targets[:, target_indexes] + 1)
    target_signal = gene_targets_log.sum(axis=0)
    plots.jointplot(
        target_signal,
        cors,
        '%s_sig.pdf' % out_base,
        x_label='Aligned TSS reads',
        y_label='Pearson R',
        cor=None,
        table=True)

    # plot nonzero correlations versus target signal
    plots.jointplot(
        target_signal,
        cors_nz,
        '%s_nz_sig.pdf' % out_base,
        x_label='Aligned TSS reads',
        y_label='Pearson R',
        cor=None,
        table=True)

  return cors

Ejemplo n.º 5

0

Mostrar archivo

Archivo: basenji_sad_norm.py Proyecto: AndyPJiang/basenji

def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    parser.add_option('-o', dest='out_dir', default='sad_norm')
    parser.add_option(
        '-s',
        dest='sample',
        default=100000,
        type='int',
        help='Number of SNPs to sample for fit [Default: %default]')
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide SAD HDF5 path')
    else:
        sad_h5_path = args[0]

    # retrieve chromosome SAD HDF5 files
    chr_sad_h5_files = sorted(glob.glob('%s/*/sad.h5' % sad_h5_path))
    assert (len(chr_sad_h5_files) > 0)

    # clean out any existing fits
    # count SNPs across chromosomes
    num_snps = 0
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad_h5 = h5py.File(chr_sad_h5_file, 'r+')

        # delete fit params
        if 'target_cauchy_fit_loc' in chr_sad_h5.keys():
            del chr_sad_h5['target_cauchy_fit_loc']
            del chr_sad_h5['target_cauchy_fit_scale']

        # delete norm params
        if 'target_cauchy_norm_loc' in chr_sad_h5.keys():
            del chr_sad_h5['target_cauchy_norm_loc']
            del chr_sad_h5['target_cauchy_norm_scale']

        # count SNPs
        num_snps += chr_sad_h5['SAD'].shape[0]
        num_targets = chr_sad_h5['SAD'].shape[-1]

        chr_sad_h5.close()

    # sample SNPs across chromosomes
    sad = sample_sad(chr_sad_h5_files, options.sample, num_snps, num_targets)

    # initialize fit parameters
    target_cauchy_fit_loc = np.zeros(num_targets)
    target_cauchy_fit_scale = np.zeros(num_targets)

    # fit parameters
    for ti in range(num_targets):
        print('Fitting t%d' % ti, flush=True)
        cp = cauchy.fit(sad[:, ti])
        target_cauchy_fit_loc[ti] = cp[0]
        target_cauchy_fit_scale[ti] = cp[1]
    del sad

    # write across chromosomes
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad_h5 = h5py.File(chr_sad_h5_file, 'r+')
        chr_sad_h5.create_dataset('target_cauchy_fit_loc',
                                  data=target_cauchy_fit_loc)
        chr_sad_h5.create_dataset('target_cauchy_fit_scale',
                                  data=target_cauchy_fit_scale)
        chr_sad_h5.close()

    # compute normalization parameters
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad5 = SAD5(chr_sad_h5_file)

    # QC fit table
    if not os.path.isdir(options.out_dir):
        os.mkdir(options.out_dir)
    fit_out = open('%s/fits.txt' % options.out_dir, 'w')
    for ti in range(num_targets):
        print('%-4d  %7.1e  %7.1e' %
              (ti, target_cauchy_fit_loc[ti], target_cauchy_fit_scale[ti]),
              file=fit_out)
    fit_out.close()

    # QC quantiles
    quantile_dir = '%s/quantiles' % options.out_dir
    if not os.path.isdir(quantile_dir):
        os.mkdir(quantile_dir)
    sad_qc = sample_sad(chr_sad_h5_files, 2048, num_snps, num_targets)
    for ti in np.linspace(0, num_targets - 1, 64, dtype='int'):
        # compute cauchy and argsort quantiles
        cauchy_q = cauchy.cdf(sad_qc[:, ti],
                              loc=target_cauchy_fit_loc[ti],
                              scale=target_cauchy_fit_scale[ti])
        sort_i = np.argsort(sad_qc[:, ti])

        quantile_pdf = '%s/t%d.pdf' % (quantile_dir, ti)

        jointplot(np.linspace(0, 1, len(sort_i)),
                  cauchy_q[sort_i],
                  quantile_pdf,
                  square=True,
                  cor=None,
                  x_label='Empirical',
                  y_label='Cauchy')

    # QC plots
    norm_dir = '%s/norm' % options.out_dir
    if not os.path.isdir(norm_dir):
        os.mkdir(norm_dir)
    chr_sad5 = SAD5(chr_sad_h5_files[0])
    qc_sample = 2048
    if qc_sample < chr_sad5.num_snps:
        ri = sorted(
            np.random.choice(np.arange(chr_sad5.num_snps),
                             size=qc_sample,
                             replace=False))
    else:
        ri = np.arange(chr_sad5.num_snps)
    qc_sad_raw = chr_sad5.sad_matrix[ri]
    qc_sad_norm = chr_sad5[ri]
    for ti in np.linspace(0, num_targets - 1, 32, dtype='int'):
        plt.figure()
        sns.jointplot(qc_sad_raw[:, ti],
                      qc_sad_norm[:, ti],
                      joint_kws={
                          'alpha': 0.5,
                          's': 10
                      })
        plt.savefig('%s/t%d.pdf' % (norm_dir, ti))
        plt.close()

Ejemplo n.º 6

0

Mostrar archivo

Archivo: basenji_test_genes.py Proyecto: lisabang/basenji

def quantile_accuracy(gene_targets, gene_preds, gene_stat, out_pdf, numq=4):
    """ Plot accuracy (PearsonR) in quantile bins across targets. """

    # plot PearsonR in variance statistic bins
    quant_indexes = quantile_indexes(gene_stat, numq)

    quantiles_series = []
    targets_series = []
    pcor_series = []

    for qi in range(numq):
        # slice quantile
        gene_targets_quant = gene_targets[quant_indexes[qi]].astype("float32")
        gene_preds_quant = gene_preds[quant_indexes[qi]].astype("float32")

        # compute target PearsonR
        for ti in range(gene_targets_quant.shape[1]):
            pcor, _ = pearsonr(gene_targets_quant[:, ti], gene_preds_quant[:, ti])

            quantiles_series.append(qi)
            targets_series.append(ti)
            pcor_series.append(pcor)

    # construct DataFrame
    df_quant = pd.DataFrame(
        {
            "Quantile": quantiles_series,
            "Target": targets_series,
            "PearsonR": pcor_series,
        }
    )
    df_quant.to_csv("%s.csv" % out_pdf[:-4])

    # print summary table
    table_out = open("%s.txt" % out_pdf[:-4], "w")
    for qi in range(numq):
        quantile_cors = df_quant[df_quant.Quantile == qi].PearsonR
        print(
            "%2d  %.4f  %.4f" % (qi, np.mean(quantile_cors), np.median(quantile_cors)),
            file=table_out,
        )
    table_out.close()

    # construct figure
    plt.figure()

    # plot individual targets as light lines
    for ti in range(gene_targets.shape[1]):
        df_quant_target = df_quant[df_quant.Target == ti]
        plt.plot(df_quant_target.Quantile, df_quant_target.PearsonR, alpha=0.1)

    # plot PearsonR distributions in quantiles
    sns.violinplot(x="Quantile", y="PearsonR", data=df_quant, color="tomato")

    plt.savefig(out_pdf)
    plt.close()

    # sort targets by their decrease
    target_ratios = []
    for ti in range(gene_targets.shape[1]):
        df_quant_target = df_quant[df_quant.Target == ti]
        assert df_quant_target.Quantile.iloc[0] == 0
        assert df_quant_target.Quantile.iloc[-1] == numq - 1
        cor_ratio = df_quant_target.PearsonR.iloc[-1] / df_quant_target.PearsonR.iloc[0]
        target_ratios.append((cor_ratio, ti))
    target_ratios = sorted(target_ratios)

    # take 10 samples across
    pct_indexes = np.linspace(0, len(target_ratios) - 1, 10 + 1).astype("int")

    # write quantile targets
    table_out = open("%s_qt.txt" % out_pdf[:-4], "w")
    sns.set(font_scale=1.2, style="ticks")

    # scatter plot each quantile
    for qi in range(numq):
        # slice quantile
        gene_targets_quant = gene_targets[quant_indexes[qi]].astype("float32")
        gene_preds_quant = gene_preds[quant_indexes[qi]].astype("float32")

        for pqi in range(len(pct_indexes)):
            pct_i = pct_indexes[pqi]
            ti = target_ratios[pct_i][1]

            print(qi, pqi, ti, target_ratios[ti], file=table_out)

            qout_pdf = "%s_pq%d_q%d.pdf" % (out_pdf[:-4], pqi, qi)
            plots.jointplot(
                gene_targets_quant[:, ti],
                gene_preds_quant[:, ti],
                qout_pdf,
                alpha=0.8,
                point_size=8,
                kind="reg",
                figsize=5,
                x_label="log2 Experiment",
                y_label="log2 Prediction",
            )

    table_out.close()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: basenji_sad_norm.py Proyecto: lisabang/basenji

def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-o", dest="out_dir", default="sad_norm")
    parser.add_option(
        "-s",
        dest="sample",
        default=100000,
        type="int",
        help="Number of SNPs to sample for fit [Default: %default]",
    )
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error("Must provide SAD HDF5 path")
    else:
        sad_h5_path = args[0]

    # retrieve chromosome SAD HDF5 files
    chr_sad_h5_files = sorted(glob.glob("%s/*/sad.h5" % sad_h5_path))
    assert len(chr_sad_h5_files) > 0

    # clean out any existing fits
    # count SNPs across chromosomes
    num_snps = 0
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad_h5 = h5py.File(chr_sad_h5_file, "r+")

        # delete fit params
        if "target_cauchy_fit_loc" in chr_sad_h5.keys():
            del chr_sad_h5["target_cauchy_fit_loc"]
            del chr_sad_h5["target_cauchy_fit_scale"]

        # delete norm params
        if "target_cauchy_norm_loc" in chr_sad_h5.keys():
            del chr_sad_h5["target_cauchy_norm_loc"]
            del chr_sad_h5["target_cauchy_norm_scale"]

        # count SNPs
        num_snps += chr_sad_h5["SAD"].shape[0]
        num_targets = chr_sad_h5["SAD"].shape[-1]

        chr_sad_h5.close()

    # sample SNPs across chromosomes
    sad = sample_sad(chr_sad_h5_files, options.sample, num_snps, num_targets)

    # initialize fit parameters
    target_cauchy_fit_loc = np.zeros(num_targets)
    target_cauchy_fit_scale = np.zeros(num_targets)

    # fit parameters
    for ti in range(num_targets):
        print("Fitting t%d" % ti, flush=True)
        cp = cauchy.fit(sad[:, ti])
        target_cauchy_fit_loc[ti] = cp[0]
        target_cauchy_fit_scale[ti] = cp[1]
    del sad

    # write across chromosomes
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad_h5 = h5py.File(chr_sad_h5_file, "r+")
        chr_sad_h5.create_dataset("target_cauchy_fit_loc", data=target_cauchy_fit_loc)
        chr_sad_h5.create_dataset(
            "target_cauchy_fit_scale", data=target_cauchy_fit_scale
        )
        chr_sad_h5.close()

    # compute normalization parameters
    for chr_sad_h5_file in chr_sad_h5_files:
        chr_sad5 = SAD5(chr_sad_h5_file)

    # QC fit table
    if not os.path.isdir(options.out_dir):
        os.mkdir(options.out_dir)
    fit_out = open("%s/fits.txt" % options.out_dir, "w")
    for ti in range(num_targets):
        print(
            "%-4d  %7.1e  %7.1e"
            % (ti, target_cauchy_fit_loc[ti], target_cauchy_fit_scale[ti]),
            file=fit_out,
        )
    fit_out.close()

    # QC quantiles
    quantile_dir = "%s/quantiles" % options.out_dir
    if not os.path.isdir(quantile_dir):
        os.mkdir(quantile_dir)
    sad_qc = sample_sad(chr_sad_h5_files, 2048, num_snps, num_targets)
    for ti in np.linspace(0, num_targets - 1, 64, dtype="int"):
        # compute cauchy and argsort quantiles
        cauchy_q = cauchy.cdf(
            sad_qc[:, ti],
            loc=target_cauchy_fit_loc[ti],
            scale=target_cauchy_fit_scale[ti],
        )
        sort_i = np.argsort(sad_qc[:, ti])

        quantile_pdf = "%s/t%d.pdf" % (quantile_dir, ti)

        jointplot(
            np.linspace(0, 1, len(sort_i)),
            cauchy_q[sort_i],
            quantile_pdf,
            square=True,
            cor=None,
            x_label="Empirical",
            y_label="Cauchy",
        )

    # QC plots
    norm_dir = "%s/norm" % options.out_dir
    if not os.path.isdir(norm_dir):
        os.mkdir(norm_dir)
    chr_sad5 = SAD5(chr_sad_h5_files[0])
    qc_sample = 2048
    if qc_sample < chr_sad5.num_snps:
        ri = sorted(
            np.random.choice(
                np.arange(chr_sad5.num_snps), size=qc_sample, replace=False
            )
        )
    else:
        ri = np.arange(chr_sad5.num_snps)
    qc_sad_raw = chr_sad5.sad_matrix[ri]
    qc_sad_norm = chr_sad5[ri]
    for ti in np.linspace(0, num_targets - 1, 32, dtype="int"):
        plt.figure()
        sns.jointplot(
            qc_sad_raw[:, ti], qc_sad_norm[:, ti], joint_kws={"alpha": 0.5, "s": 10}
        )
        plt.savefig("%s/t%d.pdf" % (norm_dir, ti))
        plt.close()