コード例 #1
0
ファイル: count.py プロジェクト: virgile-tellier/RESOLVED2
          encoding="utf-8")

counter = {}

for line in file.iter():
    drug = line[3]
    drug = drug.split(";")[0]
    if drug:
        if drug not in counter:
            counter[drug] = 1
        else:
            counter[drug] += 1

print(counter)

print(len(counter))

s = sorted(counter.items(), key=lambda x: x[1])[::-1]

print(s)

save_file = FM("../PUBMED_DATA/drug_counter", extension=".txt")

fp = save_file.get_filepointer()
for i in s:
    fp.write(("\t".join([i[0], str(i[1])]) + '\n'))
fp.close()

# with open("../PUBMED_DATA/drug_counter.latest.txt", 'w', encoding = "utf8") as fp:
# 	for i in s:
# 		fp.write(("\t".join([i[0],str(i[1])])+'\n'))
コード例 #2
0
            compacted_fda_dict[new_key]["drug"].lower().split(";"),
            fda_dict[key]["drug"].split(";"))
        compacted_fda_dict[new_key]["industry"] = setify_lists(
            compacted_fda_dict[new_key]["industry"].lower().split(";"),
            fda_dict[key]["industry"].split(";"))
        compacted_fda_dict[new_key]["date"] = setify_lists(
            compacted_fda_dict[new_key]["date"].split(";"),
            fda_dict[key]["date"].split(";"))
        compacted_fda_dict[new_key]["indication"] = setify_lists(
            compacted_fda_dict[new_key]["indication"].lower().split(";"),
            fda_dict[key]["indication"].split(";"))

# fda["drug"], fda["COMMON_DRUG_BANK_ALIAS"], fda["industry"], fda["date"], fda["min_date"]

pool_file = FM("../FDA/FDA_by_drugbank2606",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/FDA")

pool_data = []

pool_data.append(
    "FDA_APPROUVED_DRUG	COMMON_DRUG_BANK_ALIAS	INDUSTRY	INDICATION	DATES_OF_APPROVAL	FIRST_DATE_OF_APPROVAL"
)


def try_parsing_date(text):
    for fmt in ("%B %d %Y", "%b. %d %Y", "%b %Y", "%B %Y", "%B %d%Y", "%B %Y",
                "%B%d %Y", "%B%d%Y"):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
コード例 #3
0
'''
Cleaning of the fda database.
Specifically extra trailling spaces and tabs.
'''

from utils import File_Reader as FR
from utils import File_Maker as FM

charstrip = "^[ ]+|[ ]+$"
fda_file = FR("../FDA/FDA_DRUG_DATABASE_cured.txt",
              encoding="utf-16",
              sep="\t",
              strip_chars_pattern=charstrip)

fda = []
for f in fda_file.iter():
    fda.append(f[:5])

print(fda)

file = FM("../FDA/FDA_DRUG_DATABASE_cured_cleaned",
          data_stream=fda,
          extension=".txt")
file.save()

# with open("../FDA/FDA_DRUG_DATABASE_cured_cleaned.latest.txt", 'w', encoding = 'utf-8') as fp:
# 	for i in fda:
# 		fp.write("\t".join(i)+'\n')
コード例 #4
0
			match[pmid][2].append(names)
			match[pmid] = (year, title, match[pmid][2], description)
		else:
			match[pmid] = (year, title, match[pmid][2] , description)
		
	# tf.step()


missing= 0
for k,v in match.items():
	if v==("","",[],""):
		missing+=1



pubmedNdrugs = FM("../PUBMED_DATA/pubmed2606_with_drugs",
	extension = ".txt", olddata_dir = "../OLD_DATA/PUBMED_DATA/")
with pubmedNdrugs.get_filepointer() as fp:
	for k,v in match.items():
		if v[2]:
			res = ""
			for i in range(len(v[2])):
				v[2][i] = ";".join(v[2][i])

			topop = []
			for i in v[2]:
				for j in v[2]:
					# " " not in j.replace(i, "")  and not " "+i+" " in v[1]
					if i in j and len(i) < len(j) and i not in topop  and "-" in j.replace(i, "") and not v[1].count(i)>1:
						topop.append(i)
			for to in topop:
				v[2].pop(v[2].index(to))
コード例 #5
0
            pool.append(item)

# #Build final result from pool
while pool:
    elems = pool.pop()
    res = []
    while elems:
        searching = elems.pop()
        if searching not in res:
            res.append(searching)
            for p in pool:
                if searching in p:
                    for i in p:
                        elems.append(i)
                    indices = [i for i, x in enumerate(pool) if x == p]
                    pool = [x for i, x in enumerate(pool) if i not in indices]
    final_ref.append(sorted(res))

csv = []
for i in final_ref:
    csv.append(";".join(i))

csv = list(set(csv))
csv.sort()

full_drug_list = FM("../DRUG_LISTS/full_drug_list", extension=".txt")

with full_drug_list.get_filepointer() as fp:
    for drug in csv:
        fp.write(drug + "\n")
    fp.close()
コード例 #6
0
# Parse xml file.
print("parsing xml")
tree = ET.parse('../DRUGBANK/drugbank_db_schema.xml')
root = tree.getroot()

res = []

def tree_builder(node,depth):
	for child in node:
		print(child.tag)
		res.append(depth * '\t' + child.tag)
		tree_builder(child, depth+1)

tree_builder(root, 0)

tree_tagging = FM("../DRUGBANK/tree_tagging", extension = ".txt")

with tree_tagging.get_filepointer() as fp:
	for t in res:
		fp.write(t+'\n')
	fp.close()

# def tree_builder(node):
# 	children = []
# 	for elem in node:
# 		children.append(elem.tag)
# 		next_elem = tree_builder(elem)
# 		if next_elem:
# 			children.append(next_elem)

# 	return children
コード例 #7
0
from utils import File_Maker as FM

file = FM("GRAPH/test78324264.txt", replace_old=True, version_control=False)

print(file.get_filename())
print(file.get_extension())
print(file.get_savedir())
print(file.original_dir)

fp = file.get_filepointer()

fp.write("adzaf")
fp.write("sth")
fp.write("rthter")
fp.write("rthet")

fp.close()
コード例 #8
0
        d.extend(drugs[index].split(";"))
    drugs.append(";".join(sorted(list(set(d)))))

for d in todel:
    if d in drugs and drugs.index(d) < maxdel - len(todel):
        drugs.pop(drugs.index(d))

for d in range(len(drugs)):
    drugs[d] = ";".join(sorted(drugs[d].split(";")))

drugs = list(set(drugs))
drugs.sort()

pool_file = FM("../DRUG_LISTS/drug_list_2606_curated_cleaned",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/DRUG_LISTS",
               encoding="utf-8")

pool_file.set_datastream(drugs)
pool_file.save()
pool_file.close()

# annotations = annotation_file.readlines()

# header = annotations.pop(0)
# drug_dict = {}
# for line in annotations:
# 	drug_dict[line[0]] = {}
# 	for i in range(len(header)):
# 		val = ""
コード例 #9
0
                found = True
        if found:
            match[pmid] = (year, title, match[pmid][2] + names, description)

    # tf.step()

print(match[28980060])

missing = 0
for k, v in match.items():
    if v == ("", "", [], ""):
        missing += 1

print(missing)

pubmedNdrugs = FM("../PUBMED_DATA/pubmedNdrugs", extension=".txt")
with pubmedNdrugs.get_filepointer() as fp:
    for k, v in match.items():
        if v[2]:
            fp.write("\t".join([str(k), v[0], v[1], v[2][0]]) + "\n")
        else:
            fp.write("\t".join([str(k), v[0], v[1], ""]) + "\n")
    fp.close()
# with open("../PUBMED_DATA/pubmedNdrugs.latest.txt", "w", encoding = "utf-8") as fp:
# 	for k,v in match.items():
# 		if v[2]:
# 			fp.write("\t".join([str(k),v[0], v[1],v[2][0]])+"\n")
# 		else:
# 			fp.write("\t".join([str(k),v[0], v[1],""])+"\n")

noDrug = FM("../PUBMED_DATA/pubmed_data_2606_noDRUG", extension=".txt")