def save_differential_expression_csv(clustering_key, de, matrix, base_dir, cluster_names = None, file_name = 'differential_expression'): out_dir = base_dir if clustering_key is not None: out_dir = os.path.join(base_dir, clustering_key) cr_io.makedirs(out_dir, allow_existing=True) diff_expression_fn = os.path.join(out_dir, file_name + '.csv') diff_expression_header = ['Feature ID', 'Feature Name'] n_clusters = de.data.shape[1] / 3 for i in xrange(n_clusters): if cluster_names is None: diff_expression_header += ['Cluster %d Mean Counts' % (i + 1), 'Cluster %d Log2 fold change' % (i + 1), 'Cluster %d Adjusted p value' % (i + 1), ] else: diff_expression_header += ['Perturbation %s, Mean Counts' % cluster_names[i], 'Perturbation %s, Log2 fold change' % cluster_names[i], 'Perturbation %s, Adjusted p value' % cluster_names[i], ] diff_expression_prefixes = [(f.id, f.name) for f in matrix.feature_ref.feature_defs] analysis_io.save_matrix_csv(diff_expression_fn, de.data, diff_expression_header, diff_expression_prefixes)
def save_lsa_csv(lsa_map, matrix, base_dir): for n_components, lsa in lsa_map.iteritems(): n_components_dir = os.path.join(base_dir, '%d_components' % n_components) cr_io.makedirs(n_components_dir, allow_existing=True) matrix_fn = os.path.join(n_components_dir, 'projection.csv') n_columns = lsa.transformed_lsa_matrix.shape[1] assert n_columns <= n_components matrix_header = ['Barcode'] + ['PC-%d' % (i + 1) for i in xrange(n_columns)] analysis_io.save_matrix_csv(matrix_fn, lsa.transformed_lsa_matrix, matrix_header, matrix.bcs) components_fn = os.path.join(n_components_dir, 'components.csv') components_header = ['PC'] + [f.id for f in matrix.feature_ref.feature_defs] analysis_io.save_matrix_csv(components_fn, lsa.components, components_header, range(1, n_components + 1)) variance_fn = os.path.join(n_components_dir, 'variance.csv') variance_header = ['PC', 'Proportion.Variance.Explained'] analysis_io.save_matrix_csv(variance_fn, lsa.variance_explained, variance_header, range(1, n_components + 1)) dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv') dispersion_header = ['Feature', 'Normalized.Dispersion'] analysis_io.save_matrix_csv(dispersion_fn, lsa.dispersion, dispersion_header, [f.id for f in matrix.feature_ref.feature_defs]) features_fn = os.path.join(n_components_dir, 'features_selected.csv') # TODO: there are two columns here, but only 1 entry in the header...BAD features_header = ['Feature'] analysis_io.save_matrix_csv(features_fn, lsa.features_selected, features_header, range(1, len(lsa.features_selected) + 1))
def save_pca_csv(pca_map, matrix, base_dir): for n_components, pca in pca_map.iteritems(): n_components_dir = os.path.join(base_dir, '%d_components' % n_components) cr_utils.makedirs(n_components_dir, allow_existing=True) matrix_fn = os.path.join(n_components_dir, 'projection.csv') matrix_header = ['Barcode' ] + ['PC-%d' % (i + 1) for i in xrange(n_components)] cr_io.save_matrix_csv(matrix_fn, pca.transformed_pca_matrix, matrix_header, matrix.bcs) components_fn = os.path.join(n_components_dir, 'components.csv') components_header = ['PC'] + [gene.id for gene in matrix.genes] cr_io.save_matrix_csv(components_fn, pca.components, components_header, range(1, n_components + 1)) variance_fn = os.path.join(n_components_dir, 'variance.csv') variance_header = ['PC', 'Proportion.Variance.Explained'] cr_io.save_matrix_csv(variance_fn, pca.variance_explained, variance_header, range(1, n_components + 1)) dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv') dispersion_header = ['Gene', 'Normalized.Dispersion'] cr_io.save_matrix_csv(dispersion_fn, pca.dispersion, dispersion_header, [gene.id for gene in matrix.genes]) genes_fn = os.path.join(n_components_dir, 'genes_selected.csv') genes_header = ['Gene'] cr_io.save_matrix_csv(genes_fn, pca.genes_selected, genes_header, range(1, len(pca.genes_selected) + 1))
def save_clustering_csv(base_dir, clustering_key, labels, barcodes): out_dir = os.path.join(base_dir, clustering_key) cr_io.makedirs(out_dir, allow_existing=True) clusters_fn = os.path.join(out_dir, 'clusters.csv') header = ['Barcode', 'Cluster'] analysis_io.save_matrix_csv(clusters_fn, labels, header, barcodes)
def save_tsne_csv(tsne_map, matrix, base_dir): for n_tsne_components, tsne in tsne_map.iteritems(): n_tsne_components_dir = os.path.join( base_dir, '%d_components' % n_tsne_components) cr_utils.makedirs(n_tsne_components_dir, allow_existing=True) matrix_fn = os.path.join(n_tsne_components_dir, 'projection.csv') matrix_header = ['Barcode'] + [ 'TSNE-%d' % (i + 1) for i in xrange(n_tsne_components) ] cr_io.save_matrix_csv(matrix_fn, tsne.transformed_tsne_matrix, matrix_header, matrix.bcs)
def save_tsne_csv(tsne, matrix, base_dir): """Save a TSNE object to CSV""" # Preserve backward compatibility with pre-3.0 CSV files # where the CSV directory was named "2_components" and the HDF5 dataset was named "_2" key = tsne.key + '_components' tsne_dir = os.path.join(base_dir, key) cr_io.makedirs(tsne_dir, allow_existing=True) matrix_fn = os.path.join(tsne_dir, 'projection.csv') n_tsne_components = tsne.transformed_tsne_matrix.shape[1] matrix_header = ['Barcode'] + [ 'TSNE-%d' % (i + 1) for i in xrange(n_tsne_components) ] analysis_io.save_matrix_csv(matrix_fn, tsne.transformed_tsne_matrix, matrix_header, matrix.bcs)
def save_differential_expression_csv_from_features(clustering_key, de, diff_expression_prefixes, base_dir): out_dir = os.path.join(base_dir, clustering_key) cr_io.makedirs(out_dir, allow_existing=True) diff_expression_fn = os.path.join(out_dir, 'differential_expression.csv') diff_expression_header = ['Feature ID', 'Feature Name'] n_clusters = de.data.shape[1] / 3 for i in xrange(n_clusters): diff_expression_header += ['Cluster %d Mean Counts' % (i + 1), 'Cluster %d Log2 fold change' % (i + 1), 'Cluster %d Adjusted p value' % (i + 1), ] analysis_io.save_matrix_csv(diff_expression_fn, de.data, diff_expression_header, diff_expression_prefixes)
def save_pca_csv_with_bc_feature(pca_map, barcodes, features, base_dir): for n_components, pca in pca_map.iteritems(): n_components_dir = os.path.join(base_dir, '%d_components' % n_components) cr_io.makedirs(n_components_dir, allow_existing=True) matrix_fn = os.path.join(n_components_dir, 'projection.csv') n_columns = pca.transformed_pca_matrix.shape[1] assert n_columns <= n_components matrix_header = ['Barcode' ] + ['PC-%d' % (i + 1) for i in xrange(n_columns)] analysis_io.save_matrix_csv(matrix_fn, pca.transformed_pca_matrix, matrix_header, barcodes) # FBPCA presently provides 0-sized entries for the following PCA() member variables. # This allows us to distinguish FBPCA from IRLBA, and also avoids weird empty files. if pca.components.size > 0: components_fn = os.path.join(n_components_dir, 'components.csv') components_header = ['PC'] + [f.id for f in features] analysis_io.save_matrix_csv(components_fn, pca.components, components_header, range(1, n_components + 1)) if pca.variance_explained.size > 0: variance_fn = os.path.join(n_components_dir, 'variance.csv') variance_header = ['PC', 'Proportion.Variance.Explained'] analysis_io.save_matrix_csv(variance_fn, pca.variance_explained, variance_header, range(1, n_components + 1)) if pca.dispersion.size > 0: dispersion_fn = os.path.join(n_components_dir, 'dispersion.csv') dispersion_header = ['Feature', 'Normalized.Dispersion'] analysis_io.save_matrix_csv(dispersion_fn, pca.dispersion, dispersion_header, [f.id for f in features]) if pca.features_selected.size > 0: features_fn = os.path.join(n_components_dir, 'features_selected.csv') # TODO: there are two columns here, but only 1 entry in the header...BAD features_header = ['Feature'] analysis_io.save_matrix_csv( features_fn, pca.features_selected, features_header, range(1, len(pca.features_selected) + 1))
def save_differential_expression_csv(clustering_key, de, matrix, base_dir): out_dir = os.path.join(base_dir, clustering_key) cr_utils.makedirs(out_dir, allow_existing=True) diff_expression_fn = os.path.join(out_dir, 'differential_expression.csv') diff_expression_header = ['Gene ID', 'Gene Name'] n_clusters = de.data.shape[1] / 3 for i in xrange(n_clusters): diff_expression_header += [ 'Cluster %d Mean UMI Counts' % (i + 1), 'Cluster %d Log2 fold change' % (i + 1), 'Cluster %d Adjusted p value' % (i + 1), ] diff_expression_prefixes = [(gene.id, gene.name) for gene in matrix.genes] cr_io.save_matrix_csv(diff_expression_fn, de.data, diff_expression_header, diff_expression_prefixes)