Esempio n. 1
0
if not os.path.isdir(output_folder):
    os.makedirs(output_folder)

# Read in the sample table
sample_table = read_sample_table(args.sample_table)

# Read in the barcode table
strain_table = read_barcode_table(args.strain_table)

# Cluster the matrix, with the option of clustering a new dataset using the original
# matrix to define the clustering
if new_dataset is not None:
    new_matrix = new_dataset[2]
else:
    new_matrix = None
clus_wrap.cluster_one_stacked_matrix(dataset, args.clustergram_name, strain_table, sample_table, args.strain_columns, args.condition_columns, output_folder, new_matrix, args.verbosity)

update_version_file(output_folder, VERSION)

## Extract one matrix from the stack of matrices at a time, cluster, and
## export to files! Retain the filenames so they can be tarred/gzipped
## to be ready for distribution.
#filenames_noext = []
#num_components = dataset[0]
#for i in num_components:
#    matrix_id = '{}_{}-components-removed'.format(args.clustergram_name, i)
#    print 'clustering {} matrix'.format(matrix_id)
#    single_matrix_dataset = np.array([dataset[1], dataset[2], dataset[3][i]])
#    if new_dataset is not None:
#        new_matrix = new_dataset[3][i]
#    else:
strain_table = read_sample_table(args.strain_table)

# Extract one matrix from the stack of matrices at a time, cluster, and
# export to files! Retain the filenames so they can be tarred/gzipped
# to be ready for distribution.
filenames_noext = []
num_components = dataset[0]
for i in num_components:
    matrix_id = '{}_{}-components-removed'.format(args.clustergram_name, i)
    print 'clustering {} matrix'.format(matrix_id)
    single_matrix_dataset = np.array([dataset[1], dataset[2], dataset[3][i]])
    if new_dataset is not None:
        new_matrix = new_dataset[3][i]
    else:
        new_matrix = None
    filename_noext = clus_wrap.cluster_one_stacked_matrix(single_matrix_dataset, matrix_id, strain_table, sample_table, args.strain_columns, args.condition_columns, output_folder, new_matrix, args.verbosity)
    filenames_noext.append(filename_noext)

###### Ultimate goal: combine all CDTs into one big tarred/gzipped folder, for distribution!
# First, get all of the filenames (cdt, atr, gtr)
filenames = []
for f_noext in filenames_noext:
    for ext in ['cdt', 'atr', 'gtr']:
        filenames.append('{}.{}'.format(f_noext, ext))
if args.verbosity >= 2:
    print filenames
# Now, create a temporary folder to hold all of the clustergrams for tarring/gzipping
# This temp folder name is also the name of the tarred archive. Cool, right?!
tmp_dir = get_temp_clustergram_name(output_folder, args.clustergram_name)
if args.verbosity >= 2:
    print tmp_dir