Esempio n. 1
0
def search_read_group_sets(request):
    """
    Mock function
    """
    read_group_set_list = []
    for i in xrange(10):
        read_group_set = protocol.ReadGroupSet()
        read_group_set.id = str(i)
        read_group_set.name = "Hi there"
        read_group_set_list.append(read_group_set)
    return (read_group_set_list, "somepagetoken")
Esempio n. 2
0
def search_read_group_sets(request):
    ncbi_bioproject_id = request.dataset_id
    page_size = 100  # Default page size
    if request.page_size != 0:
        page_size = request.page_size

    esearch_params = {
        'db': 'sra',
        'dbfrom': 'bioproject',
        'id': ncbi_bioproject_id,
        'term': 'all[filter]'
    }
    esearch_response = requests.get(
        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi",
        esearch_params)

    # parse xml response: get SRA IDs
    ids = []
    root = ET.fromstring(esearch_response.text)
    for id in root.findall("./LinkSet/LinkSetDb"):
        if (id.find("LinkName").text == "bioproject_sra_all"):
            for sra in id.findall("./Link/Id"):
                ids.append(sra.text)
    # === get all data for these SRAs ===
    readgroupsets = []
    while (len(ids)):
        readgroupset = protocol.ReadGroupSet()
        # e.g., https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id=3543186,3543185,3543183
        sra_ids = ids[:page_size]
        ids = ids[page_size:]
        esearch_params = {'db': 'sra', 'id': ','.join(sra_ids)}
        esearch_response = requests.get(
            "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
            esearch_params)

        # parse xml response: get relevant data for these SRAs
        for child in ET.fromstring(esearch_response.text):
            readgroup = readgroupset.read_groups.add()
            for pid in child.findall("./SUBMISSION/IDENTIFIERS/PRIMARY_ID"):
                readgroup.dataset_id = pid.text
            for pid in child.findall("./RUN_SET/RUN/IDENTIFIERS/PRIMARY_ID"):
                readgroup.id = pid.text
            for eid in child.findall(
                    "./RUN_SET/RUN/Pool/Member/IDENTIFIERS/EXTERNAL_ID"):
                readgroup.biosample_id = eid.text
            for node in child.findall("./RUN_SET/RUN"):
                if (node.attrib.has_key('assembly')):
                    readgroup.reference_set_id = node.attrib['assembly']
            readgroupsets.append(readgroup)
    return readgroupsets
Esempio n. 3
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroupSet.
     """
     readGroupSet = protocol.ReadGroupSet()
     readGroupSet.id = self.getId()
     readGroupSet.read_groups.extend([
         readGroup.toProtocolElement()
         for readGroup in self.getReadGroups()
     ])
     readGroupSet.name = self.getLocalId()
     readGroupSet.dataset_id = self.getParentContainer().getId()
     readGroupSet.stats.CopyFrom(self.getStats())
     self.serializeAttributes(readGroupSet)
     return readGroupSet