if not os.path.isdir(output_folder): os.makedirs(output_folder) # Read in the sample table sample_table = read_sample_table(args.sample_table) # Read in the barcode table strain_table = read_barcode_table(args.strain_table) # Cluster the matrix, with the option of clustering a new dataset using the original # matrix to define the clustering if new_dataset is not None: new_matrix = new_dataset[2] else: new_matrix = None clus_wrap.cluster_one_stacked_matrix(dataset, args.clustergram_name, strain_table, sample_table, args.strain_columns, args.condition_columns, output_folder, new_matrix, args.verbosity) update_version_file(output_folder, VERSION) ## Extract one matrix from the stack of matrices at a time, cluster, and ## export to files! Retain the filenames so they can be tarred/gzipped ## to be ready for distribution. #filenames_noext = [] #num_components = dataset[0] #for i in num_components: # matrix_id = '{}_{}-components-removed'.format(args.clustergram_name, i) # print 'clustering {} matrix'.format(matrix_id) # single_matrix_dataset = np.array([dataset[1], dataset[2], dataset[3][i]]) # if new_dataset is not None: # new_matrix = new_dataset[3][i] # else:
strain_table = read_sample_table(args.strain_table) # Extract one matrix from the stack of matrices at a time, cluster, and # export to files! Retain the filenames so they can be tarred/gzipped # to be ready for distribution. filenames_noext = [] num_components = dataset[0] for i in num_components: matrix_id = '{}_{}-components-removed'.format(args.clustergram_name, i) print 'clustering {} matrix'.format(matrix_id) single_matrix_dataset = np.array([dataset[1], dataset[2], dataset[3][i]]) if new_dataset is not None: new_matrix = new_dataset[3][i] else: new_matrix = None filename_noext = clus_wrap.cluster_one_stacked_matrix(single_matrix_dataset, matrix_id, strain_table, sample_table, args.strain_columns, args.condition_columns, output_folder, new_matrix, args.verbosity) filenames_noext.append(filename_noext) ###### Ultimate goal: combine all CDTs into one big tarred/gzipped folder, for distribution! # First, get all of the filenames (cdt, atr, gtr) filenames = [] for f_noext in filenames_noext: for ext in ['cdt', 'atr', 'gtr']: filenames.append('{}.{}'.format(f_noext, ext)) if args.verbosity >= 2: print filenames # Now, create a temporary folder to hold all of the clustergrams for tarring/gzipping # This temp folder name is also the name of the tarred archive. Cool, right?! tmp_dir = get_temp_clustergram_name(output_folder, args.clustergram_name) if args.verbosity >= 2: print tmp_dir