def getTheTrees(): ##DOWNLOAD taxdump and store in taxo folder ##DOWNLOAD TAXREF BY HAND! and put it in taxo/ class Trans: def __init__(self): self.common_name_FR = [] print "Getting french translations..." TRANS = {} ##translations in french with open("taxo/TAXREFv11.txt") as f: for line in f: sciname = line.split("\t")[14] comnameFR = line.split("\t")[19] if (TRANS.has_key(sciname)==False and line.split("\t")[19]!=''): TRANS[sciname] = Trans() if (line.split("\t")[19]!=''): TRANS[sciname].common_name_FR.append(comnameFR) #get translation of ranks print "\nGetting rank names in french..." RANKS = {} with open("ranks.txt") as f: for line in f: rank_en = line.split("\t")[0] rank_fr = line.split("\t")[1].rstrip() ##to remove \n RANKS[rank_en] = rank_fr class Taxid: def __init__(self): self.sci_name = "" self.authority = "" self.synonym = "" # self.common_name = "" self.common_name = [] # self.common_name_FR = "" self.common_name_FR = [] cpt = 0 cptfr = 0 ATTR = {} ##here we will list attribute of each species per taxid print "Reading NCBI taxonomy..." with open("taxo/names.dmp") as f: for line in f: taxid = line.split("|")[0].replace("\t","") tid_val = line.split("|")[1].replace("\t","") tid_type = line.split("|")[3].replace("\t","") if (ATTR.has_key(taxid)==False): ATTR[taxid] = Taxid() if (tid_type=="scientific name"): ATTR[taxid].sci_name = tid_val #and get translation in french (if any) if TRANS.has_key(tid_val): ATTR[taxid].common_name_FR = TRANS[tid_val].common_name_FR cptfr += 1 if (tid_type=="authority"): if (ATTR[taxid].authority!=""): ATTR[taxid].authority = ATTR[taxid].authority + ", " + tid_val else: ATTR[taxid].authority = tid_val if (tid_type=="synonym"): if (ATTR[taxid].synonym!=""): ATTR[taxid].synonym = ATTR[taxid].synonym + ", " + tid_val else: ATTR[taxid].synonym = tid_val if (tid_type=="common name"): cpt +=1 ATTR[taxid].common_name.append(tid_val) # if (ATTR[taxid].common_name!=""): # ATTR[taxid].common_name = ATTR[taxid].common_name + ", " + tid_val # else: # ATTR[taxid].common_name = tid_val T = {} ###New gettrees from ete3 import Tree filepath = 'taxo/nodes.dmp' print "Building the NCBI taxonomy tree..." with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t","") son = line.split("|")[0].replace("\t","") rank = line.split("|")[2].replace("\t","") if (T.has_key(dad)==False): T[dad] = Tree() T[dad].name = dad # T[dad].rank = rank # T[dad].rank_FR = RANKS[rank] T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (T.has_key(son)==False): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR else: if (hasattr(T[son], 'rank')==False): T[son].rank = rank T[son].rank_FR = RANKS[rank] T[dad].add_child(T[son]) return T
def getTheTrees(): class Trans: def __init__(self): self.common_name_FR = [] print "Getting french translations..." os.system("sudo wget -O taxo/TAXONOMIC-VERNACULAR-FR.txt https://github.com/damiendevienne/taxonomy-fr/blob/master/TAXONOMIC-VERNACULAR-FR.txt?raw=true") TRANS = {} ##translations in french with open("taxo/TAXONOMIC-VERNACULAR-FR.txt") as f: for line in f: sciname = line.split("\t")[0] comnameFR = line.split("\t")[1].rstrip() if (TRANS.has_key(sciname)==False): TRANS[sciname] = Trans() TRANS[sciname].common_name_FR.append(comnameFR) #get translation of ranks print "\nGetting rank names in french..." RANKS = {} with open("taxo/ranks.txt") as f: for line in f: rank_en = line.split("\t")[0] rank_fr = line.split("\t")[1].rstrip() ##to remove \n RANKS[rank_en] = rank_fr class Taxid: def __init__(self): self.sci_name = "" self.authority = "" self.synonym = "" # self.common_name = "" self.common_name = [] # self.common_name_FR = "" self.common_name_FR = [] cpt = 0 cptfr = 0 ATTR = {} ##here we will list attribute of each species per taxid print "Reading NCBI taxonomy..." with open("taxo/names.dmp") as f: for line in f: taxid = line.split("|")[0].replace("\t","") tid_val = line.split("|")[1].replace("\t","") tid_type = line.split("|")[3].replace("\t","") if (ATTR.has_key(taxid)==False): ATTR[taxid] = Taxid() if (tid_type=="scientific name"): ATTR[taxid].sci_name = tid_val #and get translation in french (if any) if TRANS.has_key(tid_val): ATTR[taxid].common_name_FR = TRANS[tid_val].common_name_FR cptfr += 1 if (tid_type=="authority"): if (ATTR[taxid].authority!=""): ATTR[taxid].authority = ATTR[taxid].authority + ", " + tid_val else: ATTR[taxid].authority = tid_val if (tid_type=="synonym"): if (ATTR[taxid].synonym!=""): ATTR[taxid].synonym = ATTR[taxid].synonym + ", " + tid_val else: ATTR[taxid].synonym = tid_val if (tid_type=="common name"): cpt +=1 ATTR[taxid].common_name.append(tid_val) if (tid_type=="genbank common name"): cpt +=1 ATTR[taxid].common_name.append(tid_val) # if (ATTR[taxid].common_name!=""): # ATTR[taxid].common_name = ATTR[taxid].common_name + ", " + tid_val # else: # ATTR[taxid].common_name = tid_val T = {} ###New gettrees from ete3 import Tree filepath = 'taxo/nodes.dmp' print "Building the NCBI taxonomy tree..." with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t","") son = line.split("|")[0].replace("\t","") rank = line.split("|")[2].replace("\t","") ##rank OF THE SON! if (T.has_key(dad)==False): T[dad] = Tree() T[dad].name = dad # T[dad].rank = rank # T[dad].rank_FR = RANKS[rank] T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (T.has_key(son)==False): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR else: if (hasattr(T[son], 'rank')==False): T[son].rank = rank T[son].rank_FR = RANKS[rank] T[dad].add_child(T[son]) #Desambiguation T['54972'].rank_FR = "" #because those are birds, not "sabline" flowers return T
with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t","") son = line.split("|")[0].replace("\t","") rank = line.split("|")[2].replace("\t","") if (T.has_key(dad)==False): T[dad] = Tree() T[dad].name = dad T[dad].rank = rank T[dad].rank_FR = RANKS[rank] T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (T.has_key(son)==False): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR T[dad].add_child(T[son])
def getTheTrees(): ##DOWNLOAD taxdump and store in taxo folder ##DOWNLOAD TAXREF BY HAND! and put it in taxo/ class Trans: def __init__(self): self.common_name_FR = [] print("Getting french translations...") TRANS = {} ##translations in french with open("taxo/TAXREFv11.txt") as f: for line in f: sciname = line.split("\t")[14] comnameFR = line.split("\t")[19] if (sciname not in TRANS and line.split("\t")[19] != ''): TRANS[sciname] = Trans() if (line.split("\t")[19] != ''): TRANS[sciname].common_name_FR.append(comnameFR) #get translation of ranks print("Getting rank names in french...") RANKS = {} with open("taxo/ranks_FR.txt") as f: for line in f: rank_en = line.split("\t")[0] rank_fr = line.split("\t")[1].rstrip() ##to remove \n RANKS[rank_en] = rank_fr class Taxid: def __init__(self): self.sci_name = "" self.authority = "" self.synonym = "" # self.common_name = "" self.common_name = [] # self.common_name_FR = "" self.common_name_FR = [] cpt = 0 cptfr = 0 ATTR = {} ##here we will list attribute of each species per taxid print("Reading NCBI taxonomy...") with open("taxo/names.dmp") as f: for line in f: taxid = line.split("|")[0].replace("\t", "") tid_val = line.split("|")[1].replace("\t", "") tid_type = line.split("|")[3].replace("\t", "") ##PEUT ETRE RAJOUTER DES PETTS FILTRES COMME CA ??? A VOIR. # n.common_name = n.common_name[0] if len(n.common_name)>0 else "" # n.common_name = n.common_name.replace("'","''"); # n.common_name_FR = n.common_name_FR[0] if len(n.common_name_FR)>0 else "" # n.common_name_FR = n.common_name_FR.replace("'","''"); # n.rank = n.rank.replace("'","''"); # n.rank_FR = n.rank_FR.replace("'","''"); # n.sci_name = n.sci_name.replace("'","''") # #add parenthesis to the common name # if n.common_name!='': # n.common_name = "(" + n.common_name + ")" if (taxid not in ATTR): ATTR[taxid] = Taxid() if (tid_type == "scientific name"): ATTR[taxid].sci_name = tid_val #and get translation in french (if any) if tid_val in TRANS: ATTR[taxid].common_name_FR = TRANS[tid_val].common_name_FR cptfr += 1 if (tid_type == "authority"): if (ATTR[taxid].authority != ""): ATTR[taxid].authority = ATTR[ taxid].authority + ", " + tid_val else: ATTR[taxid].authority = tid_val if (tid_type == "synonym"): if (ATTR[taxid].synonym != ""): ATTR[taxid].synonym = ATTR[taxid].synonym + ", " + tid_val else: ATTR[taxid].synonym = tid_val if (tid_type == "common name"): cpt += 1 ATTR[taxid].common_name.append(tid_val) # if (ATTR[taxid].common_name!=""): # ATTR[taxid].common_name = ATTR[taxid].common_name + ", " + tid_val # else: # ATTR[taxid].common_name = tid_val T = {} ###New gettrees filepath = 'taxo/nodes.dmp' print("Building the NCBI taxonomy tree...") with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t", "") son = line.split("|")[0].replace("\t", "") rank = line.split("|")[2].replace("\t", "") if (dad not in T): T[dad] = Tree() T[dad].name = dad # T[dad].rank = rank # T[dad].rank_FR = RANKS[rank] T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (son not in T): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR else: if (hasattr(T[son], 'rank') == False): T[son].rank = rank T[son].rank_FR = RANKS[rank] T[dad].add_child(T[son]) return T
def getTheTrees(): ##DOWNLOAD taxdump and store in taxo folder ##DOWNLOAD TAXREF BY HAND! and put it in taxo/ class Trans: def __init__(self): self.common_name_FR = [] print "Getting french translations..." TRANS = {} ##translations in french with open("taxo/TAXREFv11.txt") as f: for line in f: sciname = line.split("\t")[14] comnameFR = line.split("\t")[19] if (TRANS.has_key(sciname) == False and line.split("\t")[19] != ''): TRANS[sciname] = Trans() if (line.split("\t")[19] != ''): TRANS[sciname].common_name_FR.append(comnameFR) #get translation of ranks print "\nGetting rank names in french..." RANKS = {} with open("taxo/ranks.txt") as f: for line in f: rank_en = line.split("\t")[0] rank_fr = line.split("\t")[1].rstrip() ##to remove \n RANKS[rank_en] = rank_fr class Taxid: def __init__(self): self.sci_name = "" self.authority = "" self.synonym = "" # self.common_name = "" self.common_name = [] # self.common_name_FR = "" self.common_name_FR = [] cpt = 0 cptfr = 0 ATTR = {} ##here we will list attribute of each species per taxid print "Reading NCBI taxonomy..." with open("taxo/names.dmp") as f: for line in f: taxid = line.split("|")[0].replace("\t", "") tid_val = line.split("|")[1].replace("\t", "") tid_type = line.split("|")[3].replace("\t", "") if (ATTR.has_key(taxid) == False): ATTR[taxid] = Taxid() if (tid_type == "scientific name"): ATTR[taxid].sci_name = tid_val #and get translation in french (if any) if TRANS.has_key(tid_val): ATTR[taxid].common_name_FR = TRANS[tid_val].common_name_FR cptfr += 1 if (tid_type == "authority"): if (ATTR[taxid].authority != ""): ATTR[taxid].authority = ATTR[ taxid].authority + ", " + tid_val else: ATTR[taxid].authority = tid_val if (tid_type == "synonym"): if (ATTR[taxid].synonym != ""): ATTR[taxid].synonym = ATTR[taxid].synonym + ", " + tid_val else: ATTR[taxid].synonym = tid_val if (tid_type == "common name"): cpt += 1 ATTR[taxid].common_name.append(tid_val) if (tid_type == "genbank common name"): cpt += 1 ATTR[taxid].common_name.append(tid_val) # if (ATTR[taxid].common_name!=""): # ATTR[taxid].common_name = ATTR[taxid].common_name + ", " + tid_val # else: # ATTR[taxid].common_name = tid_val T = {} ###New gettrees from ete3 import Tree filepath = 'taxo/nodes.dmp' print "Building the NCBI taxonomy tree..." with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t", "") son = line.split("|")[0].replace("\t", "") rank = line.split("|")[2].replace("\t", "") if (T.has_key(dad) == False): T[dad] = Tree() T[dad].name = dad T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (T.has_key(son) == False): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR else: if (hasattr(T[son], 'rank') == False): T[son].rank = rank # T[son].rank_FR = RANKS[rank] T[dad].add_child(T[son]) return T
with open(filepath) as fp: first_line = fp.readline() ## remove the 1 | 1 edge for line in fp: dad = line.split("|")[1].replace("\t", "") son = line.split("|")[0].replace("\t", "") rank = line.split("|")[2].replace("\t", "") if (T.has_key(dad) == False): T[dad] = Tree() T[dad].name = dad T[dad].rank = rank T[dad].rank_FR = RANKS[rank] T[dad].taxid = dad T[dad].sci_name = ATTR[dad].sci_name T[dad].common_name = ATTR[dad].common_name T[dad].synonym = ATTR[dad].synonym T[dad].authority = ATTR[dad].authority T[dad].common_name_FR = ATTR[dad].common_name_FR if (T.has_key(son) == False): T[son] = Tree() T[son].name = son T[son].rank = rank T[son].rank_FR = RANKS[rank] T[son].taxid = son T[son].sci_name = ATTR[son].sci_name T[son].common_name = ATTR[son].common_name T[son].synonym = ATTR[son].synonym T[son].authority = ATTR[son].authority T[son].common_name_FR = ATTR[son].common_name_FR T[dad].add_child(T[son]) nexml_project = nexml.Nexml()