def corpusMenuInitializer(menu, prevMenu): menu.text = """ The corpora are used for training new models and testing existing models. The corpora installable here are from the two BioNLP Shared Tasks (BioNLP'09 and BioNLP'11) on Event Extraction (organized by University of Tokyo), and the First Challenge Task: Drug-Drug Interaction Extraction (DDI'11, organized by Universidad Carlos III de Madrid). The corpora are downloaded as interaction XML files, generated from the original Shared Task files. If you need to convert the corpora from the original files, you can use the convertBioNLP.py and convertDDI.py programs located at Utils/Convert. It is also recommended to download the official BioNLP Shared Task evaluator programs, which will be used by TEES when training or testing on those corpora. """ # Mark "skip" as default option, this will be re-marked as install if a corpus is missing menu.setDefault("s") # If CORPUS_DIR setting is not set set it now if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None: Settings.setLocal("CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora")) print >>sys.stderr # Initialize handlers handlers = [] handlerArgs = [] corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora") corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download") # Check which corpora need to be installed redownload = menu.optDict["1"].toggle for corpus in ["GE", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"]: if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): menu.setDefault("i") menu.optDict["2"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True]) break if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")): menu.setDefault("i") menu.optDict["3"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) if menu.optDict["4"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI", ("-train.xml", "-devel.xml"))): menu.setDefault("i") menu.optDict["4"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) # A handler for installing BioNLP'11 evaluators evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators") evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download") if menu.optDict["5"].toggle or ( menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None) ): menu.setDefault("i") menu.optDict["5"].toggle = True handlers.append(convertBioNLP.installEvaluators) handlerArgs.append([evaluatorInstallPath, evaluatorDownloadPath, redownload, True]) # Add the handlers to install option menu.optDict["i"].handler = handlers menu.optDict["i"].handlerArgs = handlerArgs
def installPreconverted(url="BIONLP_CORPORA", destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "---------------", "Downloading preconverted corpora", "---------------" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "corpora") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "corpora/download") Utils.Download.downloadAndExtract(Settings.URL[url], destPath, downloadPath, redownload=redownload) Settings.setLocal("CORPUS_DIR", destPath, updateLocalSettings)
def install(destPath=None, redownload=False, updateLocalSettings=True): if hasattr(Settings, "SE10T8_CORPUS"): # Already installed return print >> sys.stderr, "---------------", "Downloading the SemEval 2010 Task 8 corpus", "---------------" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "resources/SemEval2010_task8_all_data.zip") Utils.Download.download(Settings.URL["SE10T8_CORPUS"], destPath, addName=False, clear=redownload) Settings.setLocal("SE10T8_CORPUS", destPath, updateLocalSettings)
def installBBData(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >>sys.stderr, "---------------", "Downloading TEES data files for BB", "---------------" print >>sys.stderr, "Bacteria tokens derived from LPSN (http://www.bacterio.cict.fr/)" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "resources") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "resources/download") Utils.Download.downloadAndExtract(Settings.URL["TEES_RESOURCES"], destPath, downloadPath, redownload=redownload) Settings.setLocal("TEES_RESOURCES", destPath, updateLocalSettings)
def installEvaluators(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "---------------", "Downloading BioNLP Shared Task evaluators", "---------------" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "tools/evaluators") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "tools/download") Utils.Download.downloadAndExtract(Settings.URL["BIONLP11_EVALUATORS"], destPath, downloadPath, redownload=redownload) Settings.setLocal("BIONLP_EVALUATOR_DIR", destPath, updateLocalSettings) Settings.setLocal("BIONLP_EVALUATOR_GOLD_DIR", os.path.join(destPath, "gold"), updateLocalSettings)
def finalizeInstall(programs, testCommand={}, programDir=None, settings={}, updateLocalSettings=False): if checkPrograms(programs, testCommand, programDir): setVariable = updateLocalSettings else: print >> sys.stderr, "All programs may not have been installed correctly" print >> sys.stderr, "Do not use the following settings if not sure:" setVariable = False for key in sorted(settings.keys()): Settings.setLocal(key, settings[key], setVariable)
def installRENData(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "---------------", "Downloading TEES data files for REN", "---------------" print >> sys.stderr, "These files are derived from UniProt bacsu and SubtiWiki" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "resources") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "resources/download") Utils.Download.downloadAndExtract(Settings.URL["TEES_RESOURCES"], destPath, downloadPath, redownload=redownload) Settings.setLocal("TEES_RESOURCES", destPath, updateLocalSettings)
def installDrugBank(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "---------------", "Downloading Drug Bank XML", "---------------" print >> sys.stderr, "See http://www.drugbank.ca/downloads for conditions of use" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "resources") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "resources/download") filenames = Utils.Download.downloadAndExtract(Settings.URL["DRUG_BANK_XML"], destPath, downloadPath, redownload=redownload) assert len(filenames) == 1 Settings.setLocal("DRUG_BANK_XML", os.path.join(destPath, filenames[0]), updateLocalSettings)
def installBBData(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "---------------", "Downloading TEES data files for BB", "---------------" print >> sys.stderr, "Bacteria tokens derived from LPSN (http://www.bacterio.cict.fr/)" if destPath == None: destPath = os.path.join(Settings.DATAPATH, "resources") if downloadPath == None: downloadPath = os.path.join(Settings.DATAPATH, "resources/download") Utils.Download.downloadAndExtract(Settings.URL["TEES_RESOURCES"], destPath, downloadPath, redownload=redownload) Settings.setLocal("TEES_RESOURCES", destPath, updateLocalSettings)
def finalizeInstall(programs, testCommand={}, programDir=None, settings={}, updateLocalSettings=False): installOK = checkPrograms(programs, testCommand, programDir) if installOK: setVariable = updateLocalSettings else: print >> sys.stderr, "All programs may not have been installed correctly" print >> sys.stderr, "Do not use the following settings if not sure:" setVariable = False for key in sorted(settings.keys()): if settings[key] != None: #raise Exception("Local setting " + str(key) + " is undefined") Settings.setLocal(key, settings[key], setVariable) else: print >> sys.stderr, "Warning, local setting " + str(key) + " is undefined" if not installOK: raise Exception("Error installing programs: " + ", ".join(programs))
def finalizeInstall(programs, testCommand={}, programDir=None, settings={}, updateLocalSettings=False): installOK = checkPrograms(programs, testCommand, programDir) if installOK: setVariable = updateLocalSettings else: print >> sys.stderr, "All programs may not have been installed correctly" print >> sys.stderr, "Do not use the following settings if not sure:" setVariable = False for key in sorted(settings.keys()): if settings[key] != None: #raise Exception("Local setting " + str(key) + " is undefined") Settings.setLocal(key, settings[key], setVariable) else: print >> sys.stderr, "Warning, local setting " + str( key) + " is undefined" if not installOK: raise Exception("Error installing programs: " + ", ".join(programs))
def setKey(key, value): if value == None: print >> sys.stderr, "No value defined for key '" + str(key) + "'" return if key not in Settings.KEY_TYPE: print >> sys.stderr, "Unknown key '" + str(key) + "'" return if Settings.KEY_TYPE[key].get("type") == "file": fullPath = os.path.abspath(value) if not os.path.exists(os.path.abspath(value)): print >> sys.stderr, "No file at '" + fullPath + "'" return if not os.path.isfile(fullPath): print >> sys.stderr, "'" + fullPath + "' is not a file" return expectedMD5 = Settings.KEY_TYPE[key].get("md5") if Settings.KEY_TYPE[key].get("md5") != None: print >> sys.stderr, "Determining MD5 for '" + fullPath + "'" md5 = FileUtils.getFileMd5(fullPath) if md5 != expectedMD5: print >> sys.stderr, "MD5 '" + md5 + "' does not match expected value '" + expectedMD5 + "'" return #print >> sys.stderr, "Defining value '" + fullPath + "' for key '" + key + "'" Settings.setLocal(key, value)
def corpusMenuInitializer(menu, prevMenu): menu.text = """ The corpora are used for training new models and testing existing models. The corpora installable here are from the three BioNLP Shared Tasks (2009, 2011 and 2013) on Event Extraction (organized by University of Tokyo), and the two Drug-Drug Interaction Extraction tasks (DDI'11 and 13, organized by Universidad Carlos III de Madrid). The 2009 and 2011 corpora are downloaded as interaction XML files, generated from the original Shared Task files. If you need to convert the corpora from the original files, you can use the convertBioNLP.py, convertDDI.py and convertDDI13.py programs located at Utils/Convert. The 2013 corpora will be converted to interaction XML from the official corpus files, downloaded automatically from the task websites. Installing the BioNLP'13 corpora will take about 10 minutes. It is also recommended to download the official BioNLP Shared Task evaluator programs, which will be used by TEES when training or testing on those corpora. """ # Mark "skip" as default option, this will be re-marked as install if a corpus is missing menu.setDefault("s") # If CORPUS_DIR setting is not set set it now if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None: Settings.setLocal( "CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora")) print >> sys.stderr # Initialize handlers handlers = [] handlerArgs = [] corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora") corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download") # Check which corpora need to be installed redownload = menu.optDict["1"].toggle # 2009-2011 corpora for corpus in [ "GE11", "EPI11", "ID11", "BB11", "BI11", "CO11", "REL11", "REN11" ]: if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): menu.setDefault("i") menu.optDict["2"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append([ "BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True ]) break if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")): menu.setDefault("i") menu.optDict["3"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append([ "BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True ]) if menu.optDict["4"].toggle or ( menu != prevMenu and not checkCorpusInstall("DDI11", ("-train.xml", "-devel.xml"))): menu.setDefault("i") menu.optDict["4"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append([ "DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True ]) # 2013 corpora bioNLP13Corpora = [ "GE13", "CG13", "PC13", "GRO13", "GRN13", "BB13T2", "BB13T3" ] for corpus in bioNLP13Corpora: if menu.optDict["5"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): menu.setDefault("i") menu.optDict["5"].toggle = True handlers.append(convertBioNLP.convert) handlerArgs.append([ bioNLP13Corpora, corpusInstallPath, corpusDownloadPath, redownload, False ]) break if menu.optDict["6"].toggle or ( menu != prevMenu and not checkCorpusInstall("DDI13", ("-train.xml", ))): menu.setDefault("i") menu.optDict["6"].toggle = False #True handlers.append(convertDDI13.convertDDI13) handlerArgs.append([corpusInstallPath, corpusDownloadPath, redownload]) # A handler for installing BioNLP'11 evaluators evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators") evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download") if menu.optDict["7"].toggle or ( menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None)): menu.setDefault("i") menu.optDict["7"].toggle = True handlers.append(convertBioNLP.installEvaluators) handlerArgs.append( [evaluatorInstallPath, evaluatorDownloadPath, redownload, True]) # Add the handlers to install option menu.optDict["i"].handler = handlers menu.optDict["i"].handlerArgs = handlerArgs
def corpusMenuInitializer(menu, prevMenu): menu.text = """ The corpora are used for training new models and testing existing models. The corpora installable here are from the three BioNLP Shared Tasks (2009, 2011 and 2013) on Event Extraction (organized by University of Tokyo), and the two Drug-Drug Interaction Extraction tasks (DDI'11 and 13, organized by Universidad Carlos III de Madrid). The 2009 and 2011 corpora are downloaded as interaction XML files, generated from the original Shared Task files. If you need to convert the corpora from the original files, you can use the convertBioNLP.py, convertDDI.py and convertDDI13.py programs located at Utils/Convert. The 2013 corpora will be converted to interaction XML from the official corpus files, downloaded automatically from the task websites. Installing the BioNLP'13 corpora will take about 10 minutes. It is also recommended to download the official BioNLP Shared Task evaluator programs, which will be used by TEES when training or testing on those corpora. """ # Mark "skip" as default option, this will be re-marked as install if a corpus is missing menu.setDefault("s") # If CORPUS_DIR setting is not set set it now if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None: Settings.setLocal("CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora")) print >> sys.stderr # Initialize handlers handlers = [] handlerArgs = [] corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora") corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download") # Check which corpora need to be installed redownload = menu.optDict["1"].toggle # 2009-2011 corpora for corpus in ["GE11", "EPI11", "ID11", "BB11", "BI11", "CO11", "REL11", "REN11"]: if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): menu.setDefault("i") menu.optDict["2"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True]) break if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")): menu.setDefault("i") menu.optDict["3"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) if menu.optDict["4"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI11", ("-train.xml", "-devel.xml"))): menu.setDefault("i") menu.optDict["4"].toggle = True handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) # 2013 corpora bioNLP13Corpora = ["GE13", "CG13", "PC13", "GRO13", "GRN13", "BB13T2", "BB13T3"] for corpus in bioNLP13Corpora: if menu.optDict["5"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): menu.setDefault("i") menu.optDict["5"].toggle = True #handlers.append(convertBioNLP.convert) #handlerArgs.append([bioNLP13Corpora, corpusInstallPath, corpusDownloadPath, redownload, False]) handlers.append(convertBioNLP.installPreconverted) handlerArgs.append(["BIONLP_13_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True]) break if menu.optDict["6"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI13", ("-train.xml",))): menu.setDefault("i") menu.optDict["6"].toggle = False #True handlers.append(convertDDI13.convertDDI13) handlerArgs.append([corpusInstallPath, corpusDownloadPath, ["DDI13_TRAIN", "DDI13_TEST_TASK_9.1", "DDI13_TEST_TASK_9.2"], redownload]) # A handler for installing BioNLP'11 evaluators evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators") evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download") if menu.optDict["7"].toggle or (menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None)): menu.setDefault("i") menu.optDict["7"].toggle = True handlers.append(convertBioNLP.installEvaluators) handlerArgs.append([evaluatorInstallPath, evaluatorDownloadPath, redownload, True]) # Add the handlers to install option menu.optDict["i"].handler = handlers menu.optDict["i"].handlerArgs = handlerArgs