def save_differential_expression_csv(clustering_key, de, matrix, base_dir,
                                        cluster_names = None,
                                        file_name = 'differential_expression'):
    out_dir = base_dir
    if clustering_key is not None:
        out_dir = os.path.join(base_dir, clustering_key)
    cr_io.makedirs(out_dir, allow_existing=True)

    diff_expression_fn = os.path.join(out_dir, file_name + '.csv')
    diff_expression_header = ['Feature ID', 'Feature Name']

    n_clusters = de.data.shape[1] / 3
    for i in xrange(n_clusters):
        if cluster_names is None:
            diff_expression_header += ['Cluster %d Mean Counts' % (i + 1),
                                       'Cluster %d Log2 fold change' % (i + 1),
                                       'Cluster %d Adjusted p value' % (i + 1), ]
        else:
            diff_expression_header += ['Perturbation %s, Mean Counts' % cluster_names[i],
                                       'Perturbation %s, Log2 fold change' % cluster_names[i],
                                       'Perturbation %s, Adjusted p value' % cluster_names[i], ]


    diff_expression_prefixes = [(f.id, f.name) for f in matrix.feature_ref.feature_defs]
    analysis_io.save_matrix_csv(diff_expression_fn,
                                de.data,
                                diff_expression_header,
                                diff_expression_prefixes)
Beispiel #2
0
def save_lsa_csv(lsa_map, matrix, base_dir):
    for n_components, lsa in lsa_map.iteritems():
        n_components_dir = os.path.join(base_dir, '%d_components' % n_components)
        cr_io.makedirs(n_components_dir, allow_existing=True)

        matrix_fn = os.path.join(n_components_dir, 'projection.csv')
        n_columns = lsa.transformed_lsa_matrix.shape[1]
        assert n_columns <= n_components
        matrix_header = ['Barcode'] + ['PC-%d' % (i + 1) for i in xrange(n_columns)]
        analysis_io.save_matrix_csv(matrix_fn, lsa.transformed_lsa_matrix, matrix_header,
                                    matrix.bcs)

        components_fn = os.path.join(n_components_dir, 'components.csv')
        components_header = ['PC'] + [f.id for f in matrix.feature_ref.feature_defs]
        analysis_io.save_matrix_csv(components_fn, lsa.components, components_header,
                                    range(1, n_components + 1))

        variance_fn = os.path.join(n_components_dir, 'variance.csv')
        variance_header = ['PC', 'Proportion.Variance.Explained']
        analysis_io.save_matrix_csv(variance_fn, lsa.variance_explained, variance_header,
                                    range(1, n_components + 1))

        dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv')
        dispersion_header = ['Feature', 'Normalized.Dispersion']
        analysis_io.save_matrix_csv(dispersion_fn, lsa.dispersion, dispersion_header,
                                    [f.id for f in matrix.feature_ref.feature_defs])

        features_fn = os.path.join(n_components_dir, 'features_selected.csv')
        # TODO: there are two columns here, but only 1 entry in the header...BAD
        features_header = ['Feature']
        analysis_io.save_matrix_csv(features_fn, lsa.features_selected, features_header, range(1, len(lsa.features_selected) + 1))
Beispiel #3
0
def save_pca_csv(pca_map, matrix, base_dir):
    for n_components, pca in pca_map.iteritems():
        n_components_dir = os.path.join(base_dir,
                                        '%d_components' % n_components)
        cr_utils.makedirs(n_components_dir, allow_existing=True)

        matrix_fn = os.path.join(n_components_dir, 'projection.csv')
        matrix_header = ['Barcode'
                         ] + ['PC-%d' % (i + 1) for i in xrange(n_components)]
        cr_io.save_matrix_csv(matrix_fn, pca.transformed_pca_matrix,
                              matrix_header, matrix.bcs)

        components_fn = os.path.join(n_components_dir, 'components.csv')
        components_header = ['PC'] + [gene.id for gene in matrix.genes]
        cr_io.save_matrix_csv(components_fn, pca.components, components_header,
                              range(1, n_components + 1))

        variance_fn = os.path.join(n_components_dir, 'variance.csv')
        variance_header = ['PC', 'Proportion.Variance.Explained']
        cr_io.save_matrix_csv(variance_fn, pca.variance_explained,
                              variance_header, range(1, n_components + 1))

        dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv')
        dispersion_header = ['Gene', 'Normalized.Dispersion']
        cr_io.save_matrix_csv(dispersion_fn, pca.dispersion, dispersion_header,
                              [gene.id for gene in matrix.genes])

        genes_fn = os.path.join(n_components_dir, 'genes_selected.csv')
        genes_header = ['Gene']
        cr_io.save_matrix_csv(genes_fn, pca.genes_selected, genes_header,
                              range(1,
                                    len(pca.genes_selected) + 1))
Beispiel #4
0
def save_clustering_csv(base_dir, clustering_key, labels, barcodes):
    out_dir = os.path.join(base_dir, clustering_key)
    cr_io.makedirs(out_dir, allow_existing=True)

    clusters_fn = os.path.join(out_dir, 'clusters.csv')

    header = ['Barcode', 'Cluster']
    analysis_io.save_matrix_csv(clusters_fn, labels, header, barcodes)
Beispiel #5
0
def save_tsne_csv(tsne_map, matrix, base_dir):
    for n_tsne_components, tsne in tsne_map.iteritems():
        n_tsne_components_dir = os.path.join(
            base_dir, '%d_components' % n_tsne_components)
        cr_utils.makedirs(n_tsne_components_dir, allow_existing=True)

        matrix_fn = os.path.join(n_tsne_components_dir, 'projection.csv')
        matrix_header = ['Barcode'] + [
            'TSNE-%d' % (i + 1) for i in xrange(n_tsne_components)
        ]
        cr_io.save_matrix_csv(matrix_fn, tsne.transformed_tsne_matrix,
                              matrix_header, matrix.bcs)
Beispiel #6
0
def save_tsne_csv(tsne, matrix, base_dir):
    """Save a TSNE object to CSV"""
    # Preserve backward compatibility with pre-3.0 CSV files
    #   where the CSV directory was named "2_components" and the HDF5 dataset was named "_2"
    key = tsne.key + '_components'

    tsne_dir = os.path.join(base_dir, key)
    cr_io.makedirs(tsne_dir, allow_existing=True)

    matrix_fn = os.path.join(tsne_dir, 'projection.csv')
    n_tsne_components = tsne.transformed_tsne_matrix.shape[1]
    matrix_header = ['Barcode'] + [
        'TSNE-%d' % (i + 1) for i in xrange(n_tsne_components)
    ]
    analysis_io.save_matrix_csv(matrix_fn, tsne.transformed_tsne_matrix,
                                matrix_header, matrix.bcs)
def save_differential_expression_csv_from_features(clustering_key, de, diff_expression_prefixes, base_dir):
    out_dir = os.path.join(base_dir, clustering_key)
    cr_io.makedirs(out_dir, allow_existing=True)

    diff_expression_fn = os.path.join(out_dir, 'differential_expression.csv')
    diff_expression_header = ['Feature ID', 'Feature Name']

    n_clusters = de.data.shape[1] / 3
    for i in xrange(n_clusters):
        diff_expression_header += ['Cluster %d Mean Counts' % (i + 1),
                                   'Cluster %d Log2 fold change' % (i + 1),
                                   'Cluster %d Adjusted p value' % (i + 1), ]

    analysis_io.save_matrix_csv(diff_expression_fn,
                                de.data,
                                diff_expression_header,
                                diff_expression_prefixes)
def save_pca_csv_with_bc_feature(pca_map, barcodes, features, base_dir):
    for n_components, pca in pca_map.iteritems():
        n_components_dir = os.path.join(base_dir,
                                        '%d_components' % n_components)
        cr_io.makedirs(n_components_dir, allow_existing=True)

        matrix_fn = os.path.join(n_components_dir, 'projection.csv')
        n_columns = pca.transformed_pca_matrix.shape[1]
        assert n_columns <= n_components
        matrix_header = ['Barcode'
                         ] + ['PC-%d' % (i + 1) for i in xrange(n_columns)]
        analysis_io.save_matrix_csv(matrix_fn, pca.transformed_pca_matrix,
                                    matrix_header, barcodes)

        # FBPCA presently provides 0-sized entries for the following PCA() member variables.
        #   This allows us to distinguish FBPCA from IRLBA, and also avoids weird empty files.
        if pca.components.size > 0:
            components_fn = os.path.join(n_components_dir, 'components.csv')
            components_header = ['PC'] + [f.id for f in features]
            analysis_io.save_matrix_csv(components_fn, pca.components,
                                        components_header,
                                        range(1, n_components + 1))

        if pca.variance_explained.size > 0:
            variance_fn = os.path.join(n_components_dir, 'variance.csv')
            variance_header = ['PC', 'Proportion.Variance.Explained']
            analysis_io.save_matrix_csv(variance_fn, pca.variance_explained,
                                        variance_header,
                                        range(1, n_components + 1))

        if pca.dispersion.size > 0:
            dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv')
            dispersion_header = ['Feature', 'Normalized.Dispersion']
            analysis_io.save_matrix_csv(dispersion_fn, pca.dispersion,
                                        dispersion_header,
                                        [f.id for f in features])

        if pca.features_selected.size > 0:
            features_fn = os.path.join(n_components_dir,
                                       'features_selected.csv')
            # TODO: there are two columns here, but only 1 entry in the header...BAD
            features_header = ['Feature']
            analysis_io.save_matrix_csv(
                features_fn, pca.features_selected, features_header,
                range(1,
                      len(pca.features_selected) + 1))
Beispiel #9
0
def save_differential_expression_csv(clustering_key, de, matrix, base_dir):
    out_dir = os.path.join(base_dir, clustering_key)
    cr_utils.makedirs(out_dir, allow_existing=True)

    diff_expression_fn = os.path.join(out_dir, 'differential_expression.csv')
    diff_expression_header = ['Gene ID', 'Gene Name']

    n_clusters = de.data.shape[1] / 3
    for i in xrange(n_clusters):
        diff_expression_header += [
            'Cluster %d Mean UMI Counts' % (i + 1),
            'Cluster %d Log2 fold change' % (i + 1),
            'Cluster %d Adjusted p value' % (i + 1),
        ]

    diff_expression_prefixes = [(gene.id, gene.name) for gene in matrix.genes]
    cr_io.save_matrix_csv(diff_expression_fn, de.data, diff_expression_header,
                          diff_expression_prefixes)