Exemple #1
0
    def load(project_id):
        """
        Loads the data for the specified input ID from the OSDF instance to
        this object. If the provided ID does not exist, then an error message
        is provided stating the project does not exist.

        Args:
            project_id (str): The OSDF ID for the document to load.

        Returns:
            A Project object with all the available OSDF data loaded into it.
        """
        module_logger.debug("In load. Specified ID: %s", project_id)

        # use OSDF get_node() to load the data
        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        project_data = session.get_osdf().get_node(project_id)

        module_logger.info("Creating a template %s.", __name__)
        project = Project.load_project(project_data)

        module_logger.debug("Returning loaded %s.", __name__)

        return project
Exemple #2
0
    def visit_attributes(self):
        """
        Return an iterator of the visit attributes associated with this
        specific visit.

        Args:
            None

        Returns:
            A collection of all VisitAttribute objects associated with
            this Visit.
        """
        from VisitAttribute import VisitAttribute

        linkage_query = '"{}"[linkage.associated_with]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(Visit.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield VisitAttribute.load_visit_attr(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #3
0
    def is_valid(self):
        """
        Validates the current object's data/JSON against the current schema
        in the OSDF instance for the specific object. However, unlike
        validates(), this method does not provide exact error messages,
        it states if the validation was successful or not.

        Args:
            None

        Returns:
            True if the data validates, False if the current state of
            fields in the instance do not validate with the OSDF instance
        """
        self.logger.debug("In is_valid.")

        document = self._get_raw_doc()

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        (valid, _error_message) = session.get_osdf().validate_node(document)

        if 'derived_from' not in self._links.keys():
            valid = False

        self.logger.debug("Valid? %s", str(valid))

        return valid
Exemple #4
0
    def validate(self):
        """
        Validates the current object's data/JSON against the current
        schema in the OSDF instance for that specific object. All required
        fields for that specific object must be present.

        Args:
            None

        Returns:
            A list of strings, where each string is the error that the
            validation raised during OSDF validation
        """
        self.logger.debug("In validate.")

        document = self._get_raw_doc()

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        (valid, error_message) = session.get_osdf().validate_node(document)

        problems = []
        if not valid:
            self.logger.info("Validation did not succeed.")
            problems.append(error_message)

        if 'derived_from' not in self._links.keys():
            problems.append("Must have a 'derived_from' link to a " + \
                            "microb_assay_prep or a host_assay_prep.")

        self.logger.debug("Number of validation problems: %s.", len(problems))
        return problems
Exemple #5
0
    def validate(self):
        """
        Validates the current object's data against the schema in the OSDF instance.

        Args:
            None

        Returns:
            A list of strings, where each string is a validation error that the
            OSDF instance identified.
        """
        self.logger.debug("In validate.")

        document = self._get_raw_doc()

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        (valid, error_message) = session.get_osdf().validate_node(document)

        problems = []

        if not valid:
            self.logger.info("Validation did not succeed for %s.", __name__)
            problems.append(error_message)

        if 'associated_with' not in self._links.keys():
            problems.append("Must add an 'associated_with' link to a visit.")

        self.logger.debug("Number of validation problems: %s.", len(problems))

        return problems
Exemple #6
0
    def annotations(self):
        """
        Returns an iterator of all Annotation nodes connected to this
        object.
        """
        self.logger.debug("In annotations().")

        linkage_query = '"annotation"[node_type] && "{}"[linkage.computed_from]'.format(
            self.id)

        query = iHMPSession.get_session().get_osdf().oql_query

        from cutlass.Annotation import Annotation

        for page_no in count(1):
            res = query(WgsAssembledSeqSet.namespace,
                        linkage_query,
                        page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Annotation.load_annotation(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #7
0
    def _upload_data(self):
        self.logger.debug("In _upload_data.")

        session = iHMPSession.get_session()

        study = self._study

        study2dir = {"ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d"}

        if study not in study2dir:
            raise ValueError("Invalid study. No directory mapping for %s" %
                             study)

        study_dir = study2dir[study]

        remote_base = os.path.basename(self._local_file)

        valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
        remote_base = ''.join(c for c in remote_base if c in valid_chars)
        remote_base = remote_base.replace(' ', '_')  # No spaces in filenames

        remote_path = "/".join(
            ["/" + study_dir, "metabolome", self._subtype, remote_base])
        self.logger.debug("Remote path for this file will be %s.", remote_path)

        upload_result = aspera.upload_file(Metabolome.aspera_server,
                                           session.username, session.password,
                                           self._local_file, remote_path)

        if not upload_result:
            self.logger.error("Experienced an error uploading the data. " + \
                              "Aborting save.")
            raise Exception("Unable to upload " + __name__)
        else:
            self._urls = ["fasp://" + Metabolome.aspera_server + remote_path]
    def abundance_matrices(self):
        """
        Returns an iterator of all AbundanceMatrix nodes connected to this
        object.
        """
        self.logger.debug("In abundance_matrices().")

        linkage_query = '"{}"[linkage.computed_from]'.format(self.id)

        query = iHMPSession.get_session().get_osdf().oql_query

        from cutlass.AbundanceMatrix import AbundanceMatrix

        for page_no in count(1):
            res = query(SixteenSTrimmedSeqSet.namespace, linkage_query,
                        page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield AbundanceMatrix.load_abundance_matrix(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
    def cytokines(self):
        """
        Returns an iterator of all Cytokines connected to this MicrobiomeAssayPrep.
        """
        self.logger.debug("In cytokines().")

        linkage_query = '"{}"[linkage.derived_from] && "cytokine"[node_type]'.format(
            self.id)

        query = iHMPSession.get_session().get_osdf().oql_query

        from cutlass.Cytokine import Cytokine

        for page_no in count(1):
            res = query(MicrobiomeAssayPrep.namespace,
                        linkage_query,
                        page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Cytokine.load_cytokine(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #10
0
    def save(self):
        """
        Saves the data to OSDF. The JSON form of the object is not valid, then
        the data is not saved. If the instance was saved previously, then the
        node ID is assigned the alphanumeric assigned by the OSDF instance. If
        not saved previously, then the node ID is 'None', and upon a successful
        save, will be defined as the alphanumeric ID from OSDF.  In addition,
        the document's version is updated when a successful save operation is
        completed.

        Args:
            None

        Returns;
            True if successful, False otherwise.

        """
        self.logger.debug("In save.")

        if not self.is_valid():
            self.logger.error("Cannot save, data is invalid")
            return False

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        success = False

        if self._id is None:
            # The document has not yet been saved
            sample_data = self._get_raw_doc()
            self.logger.info("Got the raw JSON document.")

            try:
                self.logger.info("Attempting to save a new node.")
                node_id = session.get_osdf().insert_node(sample_data)
                self.logger.info("Save for %s %s successful.", __name__, node_id)
                self.logger.info("Setting ID for %s %s.", __name__, node_id)
                self._set_id(node_id)
                self.version = 1
                success = True
            except Exception as save_exception:
                self.logger.error("An error occurred while saving %s. " + \
                                  "Reason: %s", __name__, save_exception)
        else:
            sample_data = self._get_raw_doc()
            try:
                self.logger.info("Attempting to update %s with ID: %s.", __name__, self.id)
                session.get_osdf().edit_node(sample_data)
                self.logger.info("Update for %s %s successful.", __name__, self.id)
                success = True
            except Exception as edit_exception:
                msg = "An error occurred while updating {} {}. Reason: {}".format(
                    __name__, self.id, edit_exception
                )

                self.logger.error(msg)

        return success
Exemple #11
0
    def save(self):
        """
        Saves the data in the current instance. The JSON form of the current data
        for the instance is validated in the save function. If the data is not valid,
        then the data will not be saved. If the instance was saved previously, then
        the node ID is assigned the alpha numeric found in the OSDF instance. If not
        saved previously, then the node ID is 'None', and upon a successful, will be
        assigned to the alpha numeric ID found in the OSDF instance. Also, the
        version is updated as the data is saved in the OSDF instance.

        Args:
            None

        Returns;
            True if successful, False otherwise.

        """
        self.logger.debug("In save.")

        if not self.is_valid():
            self.logger.error("Cannot save, data is invalid")
            return False

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        success = False

        if self._id is None:
            # The document has not been saved before
            visit_data = self._get_raw_doc()
            self.logger.info("Got the raw JSON document.")

            try:
                self.logger.info("Attempting to save a new node.")
                node_id = session.get_osdf().insert_node(visit_data)
                self.logger.info("Save for %s %s successful.", __name__,
                                 node_id)
                self.logger.debug("Setting ID for %s %s.", __name__, node_id)
                self._set_id(node_id)
                self._version = 1
                success = True
            except Exception as save_exception:
                self.logger.error("An error occurred while inserting %s. " + \
                                  "Reason: %s", __name__, save_exception)
        else:
            visit_data = self._get_raw_doc()
            try:
                self.logger.info("Attempting to update %s with ID: %s.",
                                 __name__, self._id)
                session.get_osdf().edit_node(visit_data)
                self.logger.info("Update for %s %s successful.", __name__,
                                 self._id)
                success = True
            except Exception as edit_exception:
                self.logger.error("An error occurred while updating visit %s." + \
                                  "Reason: %s", self._id, edit_exception)

        return success
Exemple #12
0
    def _upload_files(self, file_map):
        self.logger.debug("In _upload_files.")

        study2dir = {"ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d"}

        study = self.study
        subtype = self.subtype

        if study not in study2dir:
            raise ValueError("Invalid study. No directory mapping for %s" %
                             study)

        study_dir = study2dir[study]
        remote_paths = {}

        # Get the session so we can get the username and password
        session = iHMPSession.get_session()
        username = session.username
        password = session.password

        # For each of the Proteome data files (there are 4), transmit them
        # to the Aspera server and return a dictionary with the computed remote
        # paths...
        for file_type, local_file in file_map.iteritems():
            self.logger.debug("Uploading %s of %s type %s", __name__,
                              local_file, file_type)

            remote_base = os.path.basename(local_file)

            valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
            remote_base = ''.join(c for c in remote_base if c in valid_chars)
            remote_base = remote_base.replace(' ',
                                              '_')  # No spaces in filenames

            remote_path = "/".join([
                "/" + study_dir, "proteome_nonpride", subtype, file_type,
                remote_base
            ])
            self.logger.debug("Remote path for this file will be %s.",
                              remote_path)

            # Upload the file to the iHMP aspera server
            upload_success = aspera.upload_file(ProteomeNonPride.aspera_server,
                                                username, password, local_file,
                                                remote_path)
            if not upload_success:
                self.logger.error("Experienced an error uploading file %s.",
                                  local_file)
                raise Exception("Unable to upload " + local_file)
            else:
                remote_paths[
                    file_type] = "fasp://" + ProteomeNonPride.aspera_server + remote_path

        return remote_paths
Exemple #13
0
    def load(prep_id):
        """
        Loads the data for the specified input ID from the OSDF instance to this object.
        If the provided ID does not exist, then an error message is provided stating the
        project does not exist.

        Args:
            prep_id (str): The OSDF ID for the document to load.

        Returns:
            A SixteenSDnaPrep object with all the available OSDF data loaded into it.
        """
        module_logger.debug("In load. Specified ID: %s.", prep_id)

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        prep_data = session.get_osdf().get_node(prep_id)

        module_logger.info("Creating a template %s.", __name__)
        prep = SixteenSDnaPrep()

        module_logger.debug("Filling in %s details.", __name__)

        # The attributes commmon to all iHMP nodes
        prep._set_id(prep_data['id'])
        prep.version = prep_data['ver']
        prep.links = prep_data['linkage']

        # The attributes that are particular to SixteenSDnaPrep documents
        prep.comment = prep_data['meta']['comment']
        prep.lib_layout = prep_data['meta']['lib_layout']
        prep.lib_selection = prep_data['meta']['lib_selection']
        prep.mimarks = prep_data['meta']['mimarks']
        prep.ncbi_taxon_id = prep_data['meta']['ncbi_taxon_id']
        prep.prep_id = prep_data['meta']['prep_id']
        prep.sequencing_center = prep_data['meta']['sequencing_center']
        prep.sequencing_contact = prep_data['meta']['sequencing_contact']
        prep.storage_duration = prep_data['meta']['storage_duration']
        prep.tags = prep_data['meta']['tags']

        if 'frag_size' in prep_data['meta']:
            module_logger.info("%s data has 'frag_size' present.", __name__)
            prep.frag_size = prep_data['meta']['frag_size']

        if 'srs_id' in prep_data['meta']:
            module_logger.info("%s data has 'srs_id' present.", __name__)
            prep.srs_id = prep_data['meta']['srs_id']

        module_logger.debug("Returning loaded %s.", __name__)

        return prep
Exemple #14
0
    def _sample_attr_docs(self):
        linkage_query = '"{}"[linkage.associated_with]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(Sample.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield doc
            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #15
0
    def search(self, query):
        """
        Searches the OSDF instance using the specified input parameters

        Args:

        Returns:

        """
        self.logger.debug("In search.")

        session = iHMPSession.get_session()

        self.logger.info("Got iHMP session.")
Exemple #16
0
    def load(visit_node_id):
        """
        Loads the data for the specified input ID from the OSDF instance to this object.
        If the provided ID does not exist, then an error message is provided stating the
        project does not exist.

        Args:
            visit_node_id (str): The OSDF ID for the document to load.

        Returns:
            A Visit object with all the available OSDF data loaded into it.
        """
        module_logger.debug("In load. Specified ID: %s", visit_node_id)

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        visit_data = session.get_osdf().get_node(visit_node_id)

        module_logger.info("Creating a template %s.", __name__)
        visit = Visit()

        module_logger.debug("Filling in %s details.", __name__)

        # The attributes commmon to all iHMP nodes
        visit._set_id(visit_data['id'])
        visit.version = visit_data['ver']
        visit.links = visit_data['linkage']

        # The attributes that are particular to Visit objects
        visit.visit_id = visit_data['meta']['visit_id']
        visit.visit_number = visit_data['meta']['visit_number']
        visit.interval = visit_data['meta']['interval']

        if 'date' in visit_data['meta']:
            module_logger.info("%s data has 'date' present.", __name__)
            visit.date = visit_data['meta']['date']

        if 'clinic_id' in visit_data['meta']:
            module_logger.info("%s data has 'clinic_id' present.", __name__)
            visit.clinic_id = visit_data['meta']['clinic_id']

        if 'tags' in visit_data['meta']:
            module_logger.info("%s data has 'tags' present.", __name__)
            visit.tags = visit_data['meta']['tags']

        module_logger.debug("Returning loaded Visit.")

        return visit
Exemple #17
0
    def _upload_data(self):
        self.logger.debug("In _upload_data.")

        session = iHMPSession.get_session()

        study = self._study

        study2dir = {
            "ibd": "ibd",
            "preg_preterm": "ptb",
            "prediabetes": "t2d",
            "risk": "risk",
            "protect": "protect",
            "mucosal_ibd": "mucosal_ibd",
            "herfarth_ccfa": "herfarth_ccfa",
            "jl_crohns": "jl_crohns"
        }

        if study not in study2dir:
            raise ValueError("Invalid study. No directory mapping for %s" %
                             study)

        study_dir = study2dir[study]

        remote_base = os.path.basename(self._local_file)

        valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
        remote_base = ''.join(c for c in remote_base if c in valid_chars)
        remote_base = remote_base.replace(' ', '_')  # No spaces in filenames

        remote_path = "/".join([
            "/" + study_dir, "genome", "microbiome", "16s", "raw", remote_base
        ])
        self.logger.debug("Remote path for this file will be %s.", remote_path)

        # Upload the file to the iHMP aspera server
        upload_result = aspera.upload_file(SixteenSRawSeqSet.aspera_server,
                                           session.username, session.password,
                                           self._local_file, remote_path)

        if not upload_result:
            self.logger.error("Experienced an error uploading the data. " + \
                              "Aborting save.")
            raise Exception("Unable to load 16S raw sequence set.")
        else:
            self._urls = [
                "fasp://" + SixteenSRawSeqSet.aspera_server + remote_path
            ]
Exemple #18
0
    def _derived_docs(self):
        self.logger.debug("In _derived_docs().")

        linkage_query = '"{}"[linkage.derived_from]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(HostAssayPrep.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield doc
            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #19
0
    def search(query="\"16s_trimmed_seq_set\"[node_type]"):
        """
        Searches the OSDF database through all SixteenSTrimmedSeqSet node
        types. Any criteria the user wishes to add is provided by the user in
        the query language specifications provided in the OSDF documentation. A
        general format is (including the quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as a SixteenSTrimmedSeqSet
        instance, otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         SixteenSTrimmedSeqSet node type.

        Returns:
            Returns an array of SixteenSTrimmedSeqSet objects. It returns an
            empty list if there are no results.

        """
        module_logger.debug("In search.")

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"16s_trimmed_seq_set"[node_type]':
            query = '({}) && "16s_trimmed_seq_set"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        sixteenSTrimmedSeqSet_data = session.get_osdf().oql_query(
            SixteenSTrimmedSeqSet.namespace,
            query
        )

        all_results = sixteenSTrimmedSeqSet_data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                sixteens_trimmed_seq_set_result = \
                    SixteenSTrimmedSeqSet.load_sixteenSTrimmedSeqSet(result)
                result_list.append(sixteens_trimmed_seq_set_result)

        return result_list
Exemple #20
0
    def search(query="\"clustered_seq_set\"[node_type]"):
        """
        Searches OSDF for ClusteredSeqSet nodes. Any criteria the user wishes to
        add is provided by the user in the query language specifications
        provided in the OSDF documentation. A general format is (including the
        quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as ClusteredSeqSet instances,
        otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         ClusteredSeqSet node type.

        Returns:
            Returns an array of ClusteredSeqSet objects. It returns an empty list
            if there are no results.
        """
        module_logger.debug("In search.")

        # Searching without any parameters will return all different results
        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"clustered_seq_set"[node_type]':
            query = '({}) && "clustered_seq_set"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        # css = clustered seq set
        css_data = session.get_osdf().oql_query(ClusteredSeqSet.namespace,
                                                query)

        all_results = css_data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                css_result = ClusteredSeqSet.load_clustered_seq_set(result)
                result_list.append(css_result)

        return result_list
Exemple #21
0
    def validate(self):
        """
        Validates the current object's data/JSON against the current
        schema in the OSDF instance for that specific object. All required
        fields for that specific object must be present.

        Args:
            None

        Returns:
            A list of strings, where each string is the error that the
            validation raised during OSDF validation
        """
        self.logger.debug("In validate.")

        document = self._get_raw_doc()

        session = iHMPSession.get_session()
        self.logger.info("Got iHMP session.")

        (valid, error_message) = session.get_osdf().validate_node(document)

        problems = []

        if not valid:
            self.logger.info("Validation did not succeed for " + __name__ +
                             ".")
            problems.append(error_message)

        if self._private_files:
            self.logger.info("User specified the files are private.")
        else:
            self.logger.info(
                "Data is NOT private, so check that local_file is set.")
            if self._local_file is None:
                problems.append("Local file is not yet set.")
            elif not os.path.isfile(self._local_file):
                problems.append("Local file does not point to an actual file.")

        if 'sequenced_from' not in self._links.keys():
            problems.append("Must add a 'sequenced_from' link.")

        self.logger.debug("Number of validation problems: %s.", len(problems))

        return problems
Exemple #22
0
    def raw_seq_sets(self):
        """
        Return iterator of all raw_seq_sets sequenced from this prep.
        """
        linkage_query = '"{}"[linkage.sequenced_from]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(SixteenSDnaPrep.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield SixteenSRawSeqSet.load_16s_raw_seq_set(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #23
0
    def studies(self):
        """
        Returns an iterator of all studies connected to this project.
        """
        linkage_query = '"{}"[linkage.part_of]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(Project.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Study.load_study(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #24
0
    def search(query="\"microb_assay_prep\"[node_type]"):
        """
        Searches OSDF for MicrobiomeAssayPrep nodes. Any criteria the user
        wishes to add is provided by the user in the query language
        specifications provided in the OSDF documentation. A general format is
        (including the quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as MicrobiomeAssayPrep instances,
        otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         MicrobiomeAssayPrep node type.

        Returns:
            Returns an array of MicrobiomeAssayPrep objects. It returns an
            empty list if there are no results.
        """
        module_logger.debug("In search.")

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"microb_assay_prep"[node_type]':
            query = '({}) && "microb_assay_prep"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        prep_data = session.get_osdf().oql_query(
            MicrobiomeAssayPrep.namespace, query
        )

        all_results = prep_data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                prep_result = MicrobiomeAssayPrep.load_microassayprep(result)
                result_list.append(prep_result)

        return result_list
Exemple #25
0
    def samples(self):
        """
        Return iterator of all samples collected during this visit.
        """
        linkage_query = '"{}"[linkage.collected_during]'.format(self.id)

        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(Visit.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Sample.load_sample(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
Exemple #26
0
    def search(query="\"abundance_matrix\"[node_type]"):
        """
        Searches OSDF for AbundanceMatrix nodes. Any criteria the user wishes to
        add is provided by the user in the query language specifications
        provided in the OSDF documentation. A general format is (including the
        quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as Annotation instances,
        otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         Annotation node type.

        Returns:
            Returns an array of AbundanceMatrix objects. It returns an empty list
            if there are no results.
        """
        module_logger.debug("In search.")

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"abundance_matrix"[node_type]':
            query = '({}) && "abundance_matrix"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        matrix_data = session.get_osdf().oql_query(AbundanceMatrix.namespace,
                                                   query)

        all_results = matrix_data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                matrix_result = AbundanceMatrix.load(result['id'])
                result_list.append(matrix_result)

        return result_list
Exemple #27
0
    def studies(self):
        """
        Return iterator of all studies that are subsets of this study.
        """
        self.logger.debug("In studies.")

        linkage_query = '"{}"[linkage.subset_of]'.format(self.id)
        query = iHMPSession.get_session().get_osdf().oql_query

        for page_no in count(1):
            res = query(Study.namespace, linkage_query, page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Study.load_study(doc)

            res_count -= len(res['results'])
            if res_count < 1:
                break
Exemple #28
0
    def search(query="\"visit_attr\"[node_type]"):
        """
        Searches OSDF for VisitAttribute nodes. Any criteria the user wishes to
        add is provided by the user in the query language specifications
        provided in the OSDF documentation. A general format is (including the
        quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as SampleAttribute instances,
        otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         SampleAttribute node type.

        Returns:
            Returns an array of VisitAttribute objects. It returns an empty
            list if there are no results.
        """
        module_logger.debug("In search.")

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"visit_attr"[node_type]':
            query = '({}) && "visit_attr"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        attrib_data = session.get_osdf().oql_query(VisitAttribute.namespace,
                                                   query)

        all_results = attrib_data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                attrib_result = VisitAttribute.load_visit_attr(result)
                result_list.append(attrib_result)

        return result_list
Exemple #29
0
    def search(query="\"host_variant_call\"[node_type]"):
        """
        Searches the OSDF database through all HostVariantCall nodes. Any
        criteria the user wishes to add is provided by the user in the query
        language specifications provided in the OSDF documentation. A general
        format is (including the quotes and brackets):

        "search criteria"[field to search]

        If there are any results, they are returned as a HostVariantCall
        instance, otherwise an empty list will be returned.

        Args:
            query (str): The query for the OSDF framework. Defaults to the
                         HostVariantCall node type.

        Returns:
            Returns an array of HostVariantCall objects. It returns
            an empty list if there are no results.
        """
        module_logger.debug("In search.")

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        if query != '"host_variant_call"[node_type]':
            query = '({}) && "host_variant_call"[node_type]'.format(query)

        module_logger.debug("Submitting OQL query: %s", query)

        data = session.get_osdf().oql_query("ihmp", query)

        all_results = data['results']

        result_list = list()

        if len(all_results) > 0:
            for result in all_results:
                loaded_result = HostVariantCall.load_host_variant_call(result)
                result_list.append(loaded_result)

        return result_list
Exemple #30
0
    def load(study_id):
        """
        Loads the data for the specified input ID from the OSDF instance to
        this object.  If the provided ID does not exist, then an error message
        is provided stating the project does not exist.

        Args:
            study_id (str): The OSDF ID for the document to load.

        Returns:
            A Study object with all the available OSDF data loaded into it.
        """
        module_logger.debug("In load. Specified ID: %s", study_id)

        session = iHMPSession.get_session()
        module_logger.info("Got iHMP session.")

        study_data = session.get_osdf().get_node(study_id)

        module_logger.info("Creating a template Study.")

        study = Study()

        module_logger.debug("Filling in Study details.")

        study._set_id(study_data['id'])
        # For version, the key to use is simply 'ver'
        study.version = study_data['ver']
        study.links = study_data['linkage']

        # The attributes that are particular to Study objects
        study.name = study_data['meta']['name']
        study.description = study_data['meta']['description']
        study.center = study_data['meta']['center']
        study.contact = study_data['meta']['contact']
        study.tags = study_data['meta']['tags']

        if 'srp_id' in study_data['meta']:
            study.srp_id = study_data['meta']['srp_id']

        module_logger.debug("Returning loaded %s.", __name__)
        return study