def cluster_count_matrix(config_file, lane_id, strain_fmt_string, cond_fmt_string):

    config_params = cfp.parse(config_file)

    sample_detection_limit, control_detection_limit = get_detection_limits(config_params)

    # If the file does not exist, then do not attempt to cluster it!
    try:
        genes, conditions, matrix = load_dumped_count_matrix(config_params, lane_id)
    except IOError:
        print "could not find '{}' count matrix".format(lane_id)
        return None

    thresholded_matrix = matrix

    thresholded_matrix[thresholded_matrix < sample_detection_limit] = sample_detection_limit
    logged_matrix = np.log2(thresholded_matrix)

    # Customize the strain and condition names for interpretable visualization!
    custom_genes = customize_strains(genes, config_params, strain_fmt_string)
    custom_conditions = customize_conditions(conditions, config_params, cond_fmt_string)

    dataset = [custom_genes, custom_conditions, logged_matrix]

    record, rows_tree, cols_tree = clus.cluster(dataset)

    f = get_clustered_count_matrix_filename(config_params, lane_id)
    record.save(f, rows_tree, cols_tree)
def cluster_zscore_matrix(config_file, lane_id, strain_fmt_string, cond_fmt_string):

    config_params = cfp.parse(config_file)

    # If the file does not exist, then do not attempt to cluster it!
    try:
        genes, conditions, matrix = load_dumped_zscore_matrix(config_params, lane_id)
    except IOError:
        print "could not find '{}' zscore matrix".format(lane_id)
        return None

    # Customize the strain and condition names for interpretable visualization!
    strain_table = get_barcode_table(config_params)
    sample_table = get_sample_table(config_params)
    custom_genes = customize_strains(genes, strain_table, strain_fmt_string)
    custom_conditions = customize_conditions(conditions, sample_table, cond_fmt_string)

    dataset = [custom_genes, custom_conditions, matrix]

    record, rows_tree, cols_tree = clus.cluster(dataset)

    f = get_clustered_zscore_matrix_filename(config_params, lane_id)
    record.save(f, rows_tree, cols_tree)

    # return the filename so the cdt/atr/gtr files can be copied to a directory with all
    # of the other clustergrams and eventually tarred/gzipped for distribution!
    return f
def cluster_one_stacked_matrix(dataset, matrix_id, strain_table, sample_table, strain_fmt_string, cond_fmt_string, output_folder, new_matrix = None, verbosity = 1):

    genes, conditions, matrix = dataset

    custom_genes = customize_strains(genes, strain_table, strain_fmt_string, verbosity = verbosity)
    custom_conditions = customize_conditions(conditions, sample_table, cond_fmt_string, verbosity = verbosity)

    dataset = [custom_genes, custom_conditions, matrix]
    
    f = os.path.join(output_folder, matrix_id)
    
    record, rows_tree, cols_tree = clus.cluster(dataset, file_base = f, new_matrix = new_matrix)
    record.save(f, rows_tree, cols_tree)

    # return the filename so the cdt/atr/gtr files can be copied to a directory with all
    # of the other clustergrams and eventually tarred/gzipped for distribution!
    return f