Ejemplo n.º 1
0
def cleanGhr():
	fields = [
		["title", [["id","tab1"]]], \
		["reviewdate", [["class","datestamp","0"]]], \
		["publishdate", [["class","datestamp","1"]]], \
		["description", [["class","allcontent","0"]]]]
	required = ([0,3], []) # lists for "mandatory" and "oneof"
	doc_no = html2trec("../../data/ghr-data/", "ghr", "filelist", fields, required)
Ejemplo n.º 2
0
def cleanMadisons():
	fields = [
		["title", [["class","headerpaneopen","0"]]], \
		["createdate", [["class","createdate","0"]]], \
		["modifydate", [["class","modifydate","0"]]], \
		["description", [["class","mpowerwrapper","0"]]]]
	required = ([0,3], []) # lists for "mandatory" and "oneof"
	doc_no = html2trec("../../data/madisons-data/", "madisons", "filelist", fields, required)
Ejemplo n.º 3
0
def cleanGard():
	default_desc = 'These Web pages are updated as the Genetic and Rare Diseases Information Center receives questions and as new information becomes available.'
	fields = [
		["title", [["id","lblTitle"]]], \
		["synonyms", [["id","divSynonymColumn"]]], \
		["description", [["id","lblDescriptionQuestion"]], default_desc], \
		["references", [["id","divReferences"]]]]
	required = ([0], [1,2]) # lists for "mandatory" and "oneof"
	doc_no = html2trec("../../data/gard-data/", "gard", "filelist", fields, required)
Ejemplo n.º 4
0
def cleanHon():
	fields = [
		["title", [["class","input","5"],["xpath","option","0"]]], \
		["description", [["class","txtbeige","0"]]]]
	required = ([0,1], []) # lists for "mandatory" and "oneof"
	doc_no = html2trec("../../data/hon-data/", "hon", "filelist", fields, required)