Esempio n. 1
0
    def create_project(self, pid, sampleFilter, sampleFilterRole,
                       sampleFilterVals):

        map_file = Map(self.user_id, pid)

        project_dir = os.path.join(ProjectManager.DATA_DIRECTORY, self.user_id)
        project_dir = os.path.join(project_dir, pid)

        try:
            user_request = UserRequest(self.user_id, pid, "", "", "", "", [],
                                       sampleFilter, sampleFilterRole,
                                       sampleFilterVals, 0, "")

            table = OTUTable(self.user_id, pid, use_raw=True, use_np=False)
            orig_base = table.get_table()
            orig_headers = table.get_headers()
            orig_sample_labels = table.get_sample_labels()
            base, headers, sample_labels = table.filter_otu_table_by_metadata(
                orig_base, orig_headers, orig_sample_labels, user_request)
            initial_samples_removed = list(
                set(orig_sample_labels) - set(sample_labels))

            new_base = []
            i = 0
            while i < len(base):
                new_row = []
                j = 0
                while j < len(base[i]):
                    try:
                        if map_file.matrix_type == "float":
                            new_row.append(float(base[i][j]))
                        else:
                            new_row.append(int(base[i][j]))
                    except ValueError:
                        new_row.append(0)
                    j += 1
                new_base.append(new_row)
                i += 1

            logger.info("Subsampled file")

            # Updates map.txt file
            map_file.num_samples = len(sample_labels)
            map_file.num_otus = len(headers)
            map_file.save()

            return pid, ""
        except Exception as e:
            print(e)
            logger.exception("Error while processing the file format")
            # Removes the project directory since the files in it are invalid
            shutil.rmtree(project_dir, ignore_errors=True)
            return GENERAL_ERROR, ""
Esempio n. 2
0
    def run(self, user_request):
        # Rarefaction curves are only useful on the original raw data set
        table = OTUTable(user_request.user_id, user_request.pid, True)
        base = table.get_table()
        headers = table.get_headers()
        sample_labels = table.get_sample_labels()

        if user_request.get_custom_attr("colorvar") != "None":
            color_metadata_values = table.get_sample_metadata().get_metadata_column_table_order(sample_labels, user_request.get_custom_attr("colorvar"))
        else:
            color_metadata_values = []

        return self.analyse(base, headers, sample_labels, color_metadata_values)
Esempio n. 3
0
    def get_filtering_info(self, pid, sampleFilter, sampleFilterRole,
                           sampleFilterVals):
        """
        Returns information that will tell the user what samples will be removed and what the subsample value would be
        :param pid:
        :param sampleFilter:
        :param sampleFilterRole:
        :param sampleFilterVals:
        :return:
        """
        user_request = UserRequest(self.user_id, pid, "", "", "", "", [],
                                   sampleFilter, sampleFilterRole,
                                   sampleFilterVals, 0, "")
        map = Map(self.user_id, pid)

        table = OTUTable(self.user_id, pid, use_raw=True)
        orig_base = table.get_table()
        orig_headers = table.get_headers()
        orig_sample_labels = table.get_sample_labels()
        base, headers, sample_labels = table.filter_otu_table_by_metadata(
            orig_base, orig_headers, orig_sample_labels, user_request)
        initial_samples_removed = set(orig_sample_labels) - set(sample_labels)

        has_float = map.matrix_type == "float"

        orig_base = np.array(orig_base)
        orig_row_sums = orig_base.sum(axis=1)

        samples = {}
        i = 0
        while i < len(orig_base):
            row_sum = orig_row_sums[i]
            samples[orig_sample_labels[i]] = {
                "row_sum": row_sum,
                "removed": orig_sample_labels[i] in initial_samples_removed
            }
            i += 1

        return {"samples": samples, "has_float": has_float}
    def test_load_tsv(self):
        # pass

        # second_arg = {"a": 1}
        # with Pool(5) as pool:
        #     iterable = [0] * 20
        #     func = partialmethod(f, second_arg)
        #     pool.map(func, iterable)
        #
        # print(second_arg)


        user_request = AnalysisTestUtils.create_default_user_request()
        user_request.level = 2

        start = datetime.datetime.now()
        otu_table = OTUTable("unit_tests", "large_biom")
        end = datetime.datetime.now()
        elapsed = end - start
        print(elapsed)

        start = datetime.datetime.now()
        filtered_table, headers, sample_labels = otu_table.aggregate_otu_table_at_taxonomic_level(otu_table.get_table(), otu_table.headers,
                                                                          otu_table.sample_labels, user_request)
        filtered_table, headers, sample_labels = otu_table.filter_out_low_count_np(filtered_table, headers, sample_labels, user_request)
        print(filtered_table.shape[0])
        print(filtered_table.shape[1])
        end = datetime.datetime.now()
        elapsed = end - start
        print(elapsed)
        print("")
        start = datetime.datetime.now()
        filtered_table, headers, sample_labels = otu_table.aggregate_otu_table_at_taxonomic_level_np(otu_table.get_table(), otu_table.headers, otu_table.sample_labels, user_request)
        filtered_table, headers, sample_labels = otu_table.filter_out_low_count_np(filtered_table, headers, sample_labels, user_request)
        print(filtered_table.shape[0])
        print(filtered_table.shape[1])

        end = datetime.datetime.now()
        elapsed = end - start
        print(elapsed)