コード例 #1
0
 def __load_metadata_samples(self):
     """
     Loads a metadata file into memory
     :return:
     """
     self.metadata = DataIO.tsv_to_table(self.user_id, self.pid,
                                         SAMPLE_METADATA_FILENAME)
コード例 #2
0
    def get_otu_table_headers_at_taxonomic_level(user_id, pid, level, use_raw=False):
        logger.info("Using raw data")
        labels = DataIO.tsv_to_table(user_id, pid, RAW_GENE_TABLE_LABELS_FILENAME)
        headers = labels[0]

        if int(level) == -1:
            # OTUs requested
            return headers

        taxonomy = Taxonomy(user_id, pid)
        taxonomy_map = taxonomy.get_taxonomy_map()
        taxonomies = []
        taxonomy_to_cols = {}
        j = 0
        while j < len(headers):
            otu = headers[j]
            if otu in taxonomy_map:
                # Uncomment below if we want to use the fully quantified taxonomy string
                # taxonomy = "; ".join(taxonomy_map[otu][:int(level) + 1])
                taxonomy = taxonomy_map[otu][int(level)]
                if taxonomy != "":
                    if taxonomy in taxonomy_to_cols:
                        taxonomy_to_cols[taxonomy].append(j)
                    else:
                        taxonomy_to_cols[taxonomy] = [j]
                        taxonomies.append(taxonomy)
            j += 1
        return taxonomies
コード例 #3
0
    def load_otu_table(self, user_id, pid, use_raw, use_np):
        self.user_id = user_id
        self.pid = pid
        logger.info("Before load")
        self.sample_metadata = Metadata(user_id, pid)
        logger.info("Finished metadata loading")
        self.otu_metadata = Taxonomy(user_id, pid)
        logger.info("Finished taxonomy loading")

        logger.info("Using raw data")
        if use_np:
            self.table = DataIO.tsv_to_np_table(self.user_id, self.pid, RAW_GENE_TABLE_FILENAME)
        else:
            self.table = DataIO.tsv_to_table(self.user_id, self.pid, RAW_GENE_TABLE_FILENAME)
        labels = DataIO.tsv_to_table(self.user_id, self.pid, RAW_GENE_TABLE_LABELS_FILENAME)
        self.headers = labels[0]
        self.sample_labels = labels[1]
コード例 #4
0
 def get_file_for_download(self, project_name, type):
     if type == "sample_metadata":
         return DataIO.tsv_to_table(self.user_id, project_name,
                                    SAMPLE_METADATA_FILENAME)
     elif type == "otu":
         table = DataIO.tsv_to_table(self.user_id, project_name,
                                     RAW_GENE_TABLE_FILENAME)
         labels = DataIO.tsv_to_table(self.user_id, project_name,
                                      RAW_GENE_TABLE_LABELS_FILENAME)
         new_headers = ["Sample Labels"]
         new_headers.extend(labels[0])
         full_table = [new_headers]
         i = 0
         while i < len(table):
             new_row = [labels[1][i] if i < len(labels[1]) else ""]
             new_row.extend(table[i])
             full_table.append(new_row)
             i += 1
         return full_table
     else:
         return []
コード例 #5
0
    def test__create_project_from_tsv(self):
        project_manager = ProjectManager("unit_tests")

        unit_tests_dir = os.path.join(TestProjectManager.UNIT_TESTS_DIRECTORY,
                                      "small_biom")
        test_staging_dir = os.path.join(TestProjectManager.STAGING_DIRECTORY,
                                        "unit_tests")
        if not os.path.exists(test_staging_dir):
            os.makedirs(test_staging_dir)

        shutil.copyfile(os.path.join(unit_tests_dir, "table.raw.tsv"),
                        os.path.join(test_staging_dir, "table.raw.tsv"))
        shutil.copyfile(os.path.join(unit_tests_dir, "taxonomy.tsv"),
                        os.path.join(test_staging_dir, "taxonomy.tsv"))
        shutil.copyfile(os.path.join(unit_tests_dir, "sample_metadata.tsv"),
                        os.path.join(test_staging_dir, "sample_metadata.tsv"))

        status, pid = project_manager.stage_project_from_tsv(
            "tmp_project", "table.raw.tsv", "taxonomy.tsv",
            "sample_metadata.tsv", "")
        project_manager.create_project(pid, "", "", [])

        test_project_dir = os.path.join(
            TestProjectManager.UNIT_TESTS_DIRECTORY, pid)
        self.assertTrue(os.path.exists(test_project_dir))

        subsampled_table = DataIO.tsv_to_np_table("unit_tests", pid,
                                                  "table.subsampled.tsv")
        self.assertEqual(6, len(subsampled_table))

        r = 0
        while r < len(subsampled_table):
            self.assertEqual(30, np.sum(subsampled_table[r]))
            r += 1

        subsampled_table_labels = DataIO.tsv_to_table(
            "unit_tests", pid, "table.subsampled.labels.tsv")
        self.assertEqual(2, len(subsampled_table_labels))
        self.assertEqual(5, len(subsampled_table_labels[0]))
        self.assertEqual(6, len(subsampled_table_labels[1]))

        shutil.rmtree(test_project_dir)
コード例 #6
0
    def stage_project_from_tsv(self, project_name, otu_filename,
                               sample_metadata_filename):
        # Creates a directory for this project
        pid = str(uuid.uuid4())
        project_dir = os.path.join(ProjectManager.DATA_DIRECTORY, self.user_id)
        project_dir = os.path.join(project_dir, pid)
        if not os.path.exists(project_dir):
            os.makedirs(project_dir)
        else:
            logger.exception("Cannot create project folder")
            raise Exception(
                "Cannot create project folder as it already exists")

        # Renames the uploaded files to a standard file schema and moves to the project directory
        user_staging_dir = os.path.join(ProjectManager.STAGING_DIRECTORY,
                                        self.user_id)
        os.rename(os.path.join(user_staging_dir, sample_metadata_filename),
                  os.path.join(project_dir, SAMPLE_METADATA_FILENAME))

        sample_ids_from_sample_metadata = {}
        sample_metadata = DataIO.tsv_to_table(self.user_id,
                                              pid,
                                              SAMPLE_METADATA_FILENAME,
                                              accept_empty_headers=False)

        i = 0
        while i < len(sample_metadata):
            if i > 0:
                if len(sample_metadata[i]) > 0:
                    sample_ids_from_sample_metadata[sample_metadata[i][0]] = 1
            i += 1

        logger.info("Beginning to load the OTU table")
        base_arr = DataIO.tsv_to_table_from_path(os.path.join(
            user_staging_dir, otu_filename),
                                                 accept_empty_headers=False)

        # Processes the uploaded OTU file by removing unnecessary columns and extracting the headers and sample labels
        try:
            logger.info("Beginning to process the OTU table")
            base, headers, sample_labels, matrix_type = self.__process_base(
                self.user_id, pid, base_arr)
        except ValueError:
            logger.exception("OTU file contains non-integers")
            # Removes the project directory since the files in it are invalid
            shutil.rmtree(project_dir, ignore_errors=True)
            return OTU_DATATYPE_ERROR, ""
        except:
            logger.exception("Invalid OTU file format")
            # Removes the project directory since the files in it are invalid
            shutil.rmtree(project_dir, ignore_errors=True)
            return OTU_ERROR, ""

        # Creates map.txt file
        logger.info("Creating the map.txt file")
        map_file = Map(self.user_id, pid)
        map_file.project_name = project_name
        map_file.orig_otu_table_name = otu_filename
        map_file.orig_sample_metadata_name = sample_metadata_filename
        map_file.matrix_type = matrix_type
        map_file.num_samples = len(sample_labels)
        map_file.num_otus = len(headers)
        map_file.save()

        return OK, pid
コード例 #7
0
 def __load_taxonomy(self):
     tax = DataIO.tsv_to_table(self.user_id, self.pid,
                               RAW_GENE_TABLE_LABELS_FILENAME)
     headers = tax[0]
     self.taxonomy_map = self.__get_taxonomy_mapping_from_dict(headers)