예제 #1
0
def split(args):
    # Estimate memory usage from the matrix stored in the analysis h5
    if args.analysis:
        with tables.open_file(cr_io.h5_path(args.analysis), 'r') as f:
            matrix = getattr(f.root, cr_constants.ANALYSIS_H5_MATRIX_GROUP)
            matrix_mem_gb = cr_matrix.GeneBCMatrix.get_mem_gb_from_group(
                matrix)
    else:
        matrix_mem_gb = cr_constants.MIN_MEM_GB

    chunks = [{
        '__mem_gb': matrix_mem_gb,
    }]
    return {'chunks': chunks}
예제 #2
0
def split(args):
    if args.analysis:
        # Estimate memory usage from the matrix stored in the analysis h5
        h5_path = analysis_io.h5_path(args.analysis)
        with h5.File(h5_path, 'r') as f:
            matrix_mem_gb = cr_matrix.CountMatrix.get_mem_gb_from_group(f['matrix'])
    else:
        matrix_mem_gb = h5_constants.MIN_MEM_GB

    chunks = [{
        '__mem_gb': matrix_mem_gb,
    }]
    return {
        'chunks': chunks,
        'join': {'__mem_gb': h5_constants.MIN_MEM_GB}
    }
예제 #3
0
def main(args, outs):
    if args.skip:
        return

    if args.is_multi_genome:
        cr_io.copytree(args.multi_genome_json, outs.analysis)
        cr_io.copytree(args.multi_genome_csv, outs.analysis_csv)

    analysis_h5 = analysis_io.h5_path(outs.analysis)
    cr_io.makedirs(os.path.dirname(analysis_h5), allow_existing=True)

    # Pytables doesn't support variable len strings, so use h5py first
    with h5.File(args.matrix_h5, 'r') as matrix,\
         h5.File(analysis_h5, 'w') as out:
        # TODO: copy the first group; fixme when we have a key
        name = matrix.keys()[0]
        matrix.copy(matrix[name], out, name='matrix')

    with tables.open_file(args.pca_h5, 'r') as pca,\
         tables.open_file(args.clustering_h5, 'r') as clustering,\
         tables.open_file(args.diffexp_h5, 'r') as diffexp,\
         tables.open_file(args.tsne_h5, 'r') as tsne,\
         tables.open_file(analysis_h5, 'a') as out:

        pca.copy_children(pca.root, out.root, recursive=True)
        clustering.copy_children(clustering.root, out.root, recursive=True)
        diffexp.copy_children(diffexp.root, out.root, recursive=True)
        tsne.copy_children(tsne.root, out.root, recursive=True)

    pca_dir = os.path.join(outs.analysis_csv, 'pca')
    cr_io.copytree(args.pca_csv, pca_dir)

    clustering_dir = os.path.join(outs.analysis_csv, 'clustering')
    cr_io.copytree(args.clustering_csv, clustering_dir)

    diffexp_dir = os.path.join(outs.analysis_csv, 'diffexp')
    cr_io.copytree(args.diffexp_csv, diffexp_dir)

    tsne_dir = os.path.join(outs.analysis_csv, 'tsne')
    cr_io.copytree(args.tsne_csv, tsne_dir)
예제 #4
0
def main(args, outs):
    if args.skip:
        return

    if args.is_multi_genome:
        cr_utils.copytree(args.multi_genome_json, outs.analysis)
        cr_utils.copytree(args.multi_genome_csv, outs.analysis_csv)
        return

    analysis_h5 = cr_io.h5_path(outs.analysis)
    cr_utils.makedirs(os.path.dirname(analysis_h5), allow_existing=True)

    with tables.open_file(args.matrix_h5, 'r') as matrix,\
         tables.open_file(args.pca_h5, 'r') as pca,\
         tables.open_file(args.clustering_h5, 'r') as clustering,\
         tables.open_file(args.diffexp_h5, 'r') as diffexp,\
         tables.open_file(args.tsne_h5, 'r') as tsne,\
         tables.open_file(analysis_h5, 'w') as out:

         # NOTE - genome name is replaced with 'matrix'
         mat_groups = [m for m in matrix.root]
         matrix.copy_node(mat_groups[0], out.root, recursive=True, newname='matrix')

         pca.copy_children(pca.root, out.root, recursive=True)
         clustering.copy_children(clustering.root, out.root, recursive=True)
         diffexp.copy_children(diffexp.root, out.root, recursive=True)
         tsne.copy_children(tsne.root, out.root, recursive=True)

    pca_dir = os.path.join(outs.analysis_csv, 'pca')
    cr_utils.copytree(args.pca_csv, pca_dir)

    clustering_dir = os.path.join(outs.analysis_csv, 'clustering')
    cr_utils.copytree(args.clustering_csv, clustering_dir)

    diffexp_dir = os.path.join(outs.analysis_csv, 'diffexp')
    cr_utils.copytree(args.diffexp_csv, diffexp_dir)

    tsne_dir = os.path.join(outs.analysis_csv, 'tsne')
    cr_utils.copytree(args.tsne_csv, tsne_dir)
예제 #5
0
 def load_default_format(base_dir, method):
     h5_file_path = analysis_io.h5_path(base_dir)
     if os.path.exists(h5_file_path):
         return SingleGenomeAnalysis.load_h5(h5_file_path, method)
     else:
         return None