def work():
    while True:
        url = in_queue.get()

        content = requests.get(url).content.decode('iso8859-1')

        container = {}
        container["cazy"] = {}
        container["taxonomy"] = {}

        try:
            container["name"] = re.findall(r'id="font_org">(.+)</font>',
                                           content)[0]

            #container["organism"] = re.findall(r'id="font_org">(.+)</font>', content)[0]
            cazy_page_taxon_name = re.findall(
                r'<font class="titre_cazome" id="font_org">(.+)<\/font>',
                content)[0]
            #print (cazy_page_taxon_name)
            taxonomy_id = -1
            if cazy_page_taxon_name:
                taxonomy_id = int(
                    pyphy.getTaxidByName(cazy_page_taxon_name.strip())[0])

            if taxonomy_id == -1:
                taxonomy_id = re.findall(
                    r'http://www\.ncbi\.nlm\.nih\.gov/Taxonomy/Browser/wwwtax\.cgi\?id=(\d+)',
                    content)[0]

            container["taxid"] = int(taxonomy_id)
            #lineage = re.findall(r'<b>Lineage</b>\:(.+)<br><br />', content)[0].strip()

            current_id = int(taxonomy_id)
            while current_id != 1 and current_id != -1:
                current_id = int(pyphy.getParentByTaxid(current_id))
                if pyphy.getRankByTaxid(current_id) in desired_ranks:
                    container["taxonomy"][pyphy.getRankByTaxid(current_id)] = [
                        pyphy.getNameByTaxid(current_id), current_id
                    ]

            #container["total_gi"] = len(pyphy.getGiByTaxid(taxonomy_id))
            cazies = re.findall(rx, content)
            #print cazy
            for cazy in cazies:
                container["cazy"][cazy[0]] = int(cazy[1])

            if len(container["cazy"]) != 0:
                writeLock.acquire()
                container["column"] = "genome"
                print(json.dumps(container))
                writeLock.release()

        except Exception:
            pass

        in_queue.task_done()
def work():
    while True:
        content = in_queue.get()
        nonsense = True

        #print ("initial", content)
        for line in content.split("\n"):
            fields = line.split("\t")
            temp_content = ["-1"] * 7
            if nonsense == True:
                search_taxon = rx_taxon.search(line)

                if search_taxon:
                    taxon = search_taxon.group(1).replace("_", " ")

                    if "sp." in taxon:
                        taxon = taxon.split("sp.")[0] + "sp. " + taxon.split(
                            "sp.")[1].strip().replace(" ", "_")

                    if taxon not in taxon_content:

                        taxid = pyphy.getTaxidByName(taxon)[0]

                        path = pyphy.getPathByTaxid(taxid)

                        for item in path:
                            rank = pyphy.getRankByTaxid(item)

                            if rank in desired_ranks:
                                index = desired_ranks.index(rank)

                                temp_content[index] = pyphy.getNameByTaxid(
                                    item)

                        dictLock.acquire()
                        taxon_content[taxon] = temp_content
                        dictLock.release()

                    else:
                        temp_content = taxon_content[taxon]

                if temp_content.count("-1") < 7:
                    nonsense = False

                    writeLock.acquire()
                    #print ()
                    print(fields[0] + "\t" + "\t".join(temp_content))
                    writeLock.release()

        in_queue.task_done()
Beispiel #3
0
def worker(one_query):
    #print (taxid)

    name_path = {}
    query_name = ""

    #print (one_query)

    for query in one_query:

        if len(name_path) == 0:

            query_name, taxid, score, pident, qcovs, evalue = query

            temp_name_path = {}

            if cache.exists(taxid):
                temp_name_path = cache.hgetall(taxid)
            else:

                path = pyphy.getDictPathByTaxid(taxid)

                temp_name_path = {
                    rank: pyphy.getNameByTaxid(path[rank])
                    for rank in path
                }

                cache.hmset(taxid, temp_name_path)

            if "phylum" in temp_name_path:
                name_path = temp_name_path

    if len(name_path) != 0:
        content = "\t".join([query_name, taxid, score, pident, qcovs, evalue
                             ]) + "\t"
        for rank in desired_ranks:
            if rank in name_path:
                content += name_path[rank] + "\t"
            else:
                content += "\t"

        print(content.strip())
Beispiel #4
0
                for t in search_taxid:
                    taxid = int(t)


        #print (file)

        if taxid != -1:
            dict_path = pyphy.getDictPathByTaxid(taxid)

            dict_path["genome"] = taxid
            quartett = [""] * 2

            #print (dict_path)

            if filter_rank in dict_path and pyphy.getNameByTaxid(dict_path[filter_rank]) == filter_taxon:

                for rank in desired_rank:
                    if rank in dict_path:
                        name = definition.split(",")[0]
                        if rank != "genome":
                            name = pyphy.getNameByTaxid(dict_path[rank])

                        if rank == "superkingdom":


                                quartett[0] = dict_path[rank]

                                if dict_path[rank] not in taxid_taxon:
                                    taxid_taxon[dict_path[rank]] = [name, rank]
                        else:
Beispiel #5
0
 def test_TaxidToName(self):
     self.assertEqual(pyphy.getNameByTaxid(2), "Bacteria")