def create_project(self, pid, sampleFilter, sampleFilterRole, sampleFilterVals): map_file = Map(self.user_id, pid) project_dir = os.path.join(ProjectManager.DATA_DIRECTORY, self.user_id) project_dir = os.path.join(project_dir, pid) try: user_request = UserRequest(self.user_id, pid, "", "", "", "", [], sampleFilter, sampleFilterRole, sampleFilterVals, 0, "") table = OTUTable(self.user_id, pid, use_raw=True, use_np=False) orig_base = table.get_table() orig_headers = table.get_headers() orig_sample_labels = table.get_sample_labels() base, headers, sample_labels = table.filter_otu_table_by_metadata( orig_base, orig_headers, orig_sample_labels, user_request) initial_samples_removed = list( set(orig_sample_labels) - set(sample_labels)) new_base = [] i = 0 while i < len(base): new_row = [] j = 0 while j < len(base[i]): try: if map_file.matrix_type == "float": new_row.append(float(base[i][j])) else: new_row.append(int(base[i][j])) except ValueError: new_row.append(0) j += 1 new_base.append(new_row) i += 1 logger.info("Subsampled file") # Updates map.txt file map_file.num_samples = len(sample_labels) map_file.num_otus = len(headers) map_file.save() return pid, "" except Exception as e: print(e) logger.exception("Error while processing the file format") # Removes the project directory since the files in it are invalid shutil.rmtree(project_dir, ignore_errors=True) return GENERAL_ERROR, ""
def run(self, user_request): # Rarefaction curves are only useful on the original raw data set table = OTUTable(user_request.user_id, user_request.pid, True) base = table.get_table() headers = table.get_headers() sample_labels = table.get_sample_labels() if user_request.get_custom_attr("colorvar") != "None": color_metadata_values = table.get_sample_metadata().get_metadata_column_table_order(sample_labels, user_request.get_custom_attr("colorvar")) else: color_metadata_values = [] return self.analyse(base, headers, sample_labels, color_metadata_values)
def get_filtering_info(self, pid, sampleFilter, sampleFilterRole, sampleFilterVals): """ Returns information that will tell the user what samples will be removed and what the subsample value would be :param pid: :param sampleFilter: :param sampleFilterRole: :param sampleFilterVals: :return: """ user_request = UserRequest(self.user_id, pid, "", "", "", "", [], sampleFilter, sampleFilterRole, sampleFilterVals, 0, "") map = Map(self.user_id, pid) table = OTUTable(self.user_id, pid, use_raw=True) orig_base = table.get_table() orig_headers = table.get_headers() orig_sample_labels = table.get_sample_labels() base, headers, sample_labels = table.filter_otu_table_by_metadata( orig_base, orig_headers, orig_sample_labels, user_request) initial_samples_removed = set(orig_sample_labels) - set(sample_labels) has_float = map.matrix_type == "float" orig_base = np.array(orig_base) orig_row_sums = orig_base.sum(axis=1) samples = {} i = 0 while i < len(orig_base): row_sum = orig_row_sums[i] samples[orig_sample_labels[i]] = { "row_sum": row_sum, "removed": orig_sample_labels[i] in initial_samples_removed } i += 1 return {"samples": samples, "has_float": has_float}
def test_load_tsv(self): # pass # second_arg = {"a": 1} # with Pool(5) as pool: # iterable = [0] * 20 # func = partialmethod(f, second_arg) # pool.map(func, iterable) # # print(second_arg) user_request = AnalysisTestUtils.create_default_user_request() user_request.level = 2 start = datetime.datetime.now() otu_table = OTUTable("unit_tests", "large_biom") end = datetime.datetime.now() elapsed = end - start print(elapsed) start = datetime.datetime.now() filtered_table, headers, sample_labels = otu_table.aggregate_otu_table_at_taxonomic_level(otu_table.get_table(), otu_table.headers, otu_table.sample_labels, user_request) filtered_table, headers, sample_labels = otu_table.filter_out_low_count_np(filtered_table, headers, sample_labels, user_request) print(filtered_table.shape[0]) print(filtered_table.shape[1]) end = datetime.datetime.now() elapsed = end - start print(elapsed) print("") start = datetime.datetime.now() filtered_table, headers, sample_labels = otu_table.aggregate_otu_table_at_taxonomic_level_np(otu_table.get_table(), otu_table.headers, otu_table.sample_labels, user_request) filtered_table, headers, sample_labels = otu_table.filter_out_low_count_np(filtered_table, headers, sample_labels, user_request) print(filtered_table.shape[0]) print(filtered_table.shape[1]) end = datetime.datetime.now() elapsed = end - start print(elapsed)