def main(): parser = OptionParser(usage='train and evaluate ML model for DDI classification based on the DDI corpus') parser.add_option("-f", "--file", dest="file", action="store", default="pairs.pickle", help="Pickle file to load/store the data") parser.add_option("-d", "--dir", action="store", dest="dir", type = "string", default="DDICorpus/Test/DDIextraction/MedLine/", help="Corpus directory with XML files") parser.add_option("--reload", action="store_true", default=False, dest="reload", help="Reload corpus") parser.add_option("--log", action="store", dest="loglevel", type = "string", default="WARNING", help="Log level") parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log", help="Log file") parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10, help="Number of cross-validation folds") parser.add_option("--action", action="store", dest="action", type="string", default="cv", help="cv, train, test, or classify") parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk", help="slk, svmtk") (options, args) = parser.parse_args() numeric_level = getattr(logging, options.loglevel.upper(), None) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s') #logging.getLogger().setLevel(numeric_level) logging.debug("debug test") logging.info("info test") logging.warning("warning test") if options.file in os.listdir(os.getcwd()) and not options.reload: print "loading corpus pickle", options.file docs = pickle.load(open(options.file, 'rb')) else: print "loading corpus", options.dir docs = relations.loadCorpus(options.dir) pickle.dump(docs, open(options.file, 'wb'))
def main(): parser = OptionParser( usage= 'train and evaluate ML model for DDI classification based on the DDI corpus' ) parser.add_option("-f", "--file", dest="file", action="store", default="pairs.pickle", help="Pickle file to load/store the data") parser.add_option("-d", "--dir", action="store", dest="dir", type="string", default="DDICorpus/Test/DDIextraction/MedLine/", help="Corpus directory with XML files") parser.add_option("--reload", action="store_true", default=False, dest="reload", help="Reload corpus") parser.add_option("--log", action="store", dest="loglevel", type="string", default="WARNING", help="Log level") parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log", help="Log file") parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10, help="Number of cross-validation folds") parser.add_option("--action", action="store", dest="action", type="string", default="cv", help="cv, train, test, or classify") parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk", help="slk, svmtk") (options, args) = parser.parse_args() numeric_level = getattr(logging, options.loglevel.upper(), None) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s') #logging.getLogger().setLevel(numeric_level) logging.debug("debug test") logging.info("info test") logging.warning("warning test") if options.file in os.listdir(os.getcwd()) and not options.reload: print "loading corpus pickle", options.file docs = pickle.load(open(options.file, 'rb')) else: print "loading corpus", options.dir docs = relations.loadCorpus(options.dir) pickle.dump(docs, open(options.file, 'wb')) #build_data_frame(docs) #if 'parsetree' not in docs['info']: # for doc in docs: # for s in docs[doc]: # docs[doc][s]['parsetree'] = gettree(docs[doc][s]['tokens']) # docs['info'].append('parsetree') #trainEvaluatePairs(docs, nfolds=options.nfolds) if options.kernel == 'slk': generatejSREdata(docs, options.action + '_pairs.txt') if options.action == 'train': trainjSRE(options.kernel + '_' + options.action + '_pairs.txt') elif options.action == 'test': testjSRE(options.kernel + '_' + options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt") elif options.kernel == 'svmtk': generateSVMTKdata(docs) if options.action == 'train': trainSVMTK(options.kernel + '_' + options.action + '_pairs.txt') elif options.action == 'test': testSVMTK(options.kernel + '_' + options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt") generateSVMTKdata(docs)
def main(): parser = OptionParser( usage= 'train and evaluate ML model for DDI classification based on the DDI corpus' ) parser.add_option("-f", "--file", dest="file", action="store", default="pairs.pickle", help="Pickle file to load/store the data") parser.add_option("-d", "--dir", action="store", dest="dir", type="string", default="DDICorpus/Test/DDIextraction/MedLine/", help="Corpus directory with XML files") parser.add_option("--reload", action="store_true", default=False, dest="reload", help="Reload corpus") parser.add_option("--log", action="store", dest="loglevel", type="string", default="WARNING", help="Log level") parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log", help="Log file") parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10, help="Number of cross-validation folds") parser.add_option("--action", action="store", dest="action", type="string", default="cv", help="cv, train, test, or classify") parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk", help="slk, svmtk") (options, args) = parser.parse_args() numeric_level = getattr(logging, options.loglevel.upper(), None) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s') #logging.getLogger().setLevel(numeric_level) logging.debug("debug test") logging.info("info test") logging.warning("warning test") if options.file in os.listdir(os.getcwd()) and not options.reload: print "loading corpus pickle", options.file docs = pickle.load(open(options.file, 'rb')) else: print "loading corpus", options.dir docs = relations.loadCorpus(options.dir) pickle.dump(docs, open(options.file, 'wb'))
def main(): parser = OptionParser(usage='train and evaluate ML model for DDI classification based on the DDI corpus') parser.add_option("-f", "--file", dest="file", action="store", default="pairs.pickle", help="Pickle file to load/store the data") parser.add_option("-d", "--dir", action="store", dest="dir", type = "string", default="DDICorpus/Test/DDIextraction/MedLine/", help="Corpus directory with XML files") parser.add_option("--reload", action="store_true", default=False, dest="reload", help="Reload corpus") parser.add_option("--log", action="store", dest="loglevel", type = "string", default="WARNING", help="Log level") parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log", help="Log file") parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10, help="Number of cross-validation folds") parser.add_option("--action", action="store", dest="action", type="string", default="cv", help="cv, train, test, or classify") parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk", help="slk, svmtk") (options, args) = parser.parse_args() numeric_level = getattr(logging, options.loglevel.upper(), None) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s') #logging.getLogger().setLevel(numeric_level) logging.debug("debug test") logging.info("info test") logging.warning("warning test") if options.file in os.listdir(os.getcwd()) and not options.reload: print "loading corpus pickle", options.file docs = pickle.load(open(options.file, 'rb')) else: print "loading corpus", options.dir docs = relations.loadCorpus(options.dir) pickle.dump(docs, open(options.file, 'wb')) #build_data_frame(docs) #if 'parsetree' not in docs['info']: # for doc in docs: # for s in docs[doc]: # docs[doc][s]['parsetree'] = gettree(docs[doc][s]['tokens']) # docs['info'].append('parsetree') #trainEvaluatePairs(docs, nfolds=options.nfolds) if options.kernel == 'slk': generatejSREdata(docs, options.action + '_pairs.txt') if options.action == 'train': trainjSRE(options.kernel + '_' + options.action + '_pairs.txt') elif options.action == 'test': testjSRE(options.kernel + '_' +options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt") elif options.kernel == 'svmtk': generateSVMTKdata(docs) if options.action == 'train': trainSVMTK(options.kernel + '_' +options.action + '_pairs.txt') elif options.action == 'test': testSVMTK(options.kernel + '_' +options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt") generateSVMTKdata(docs)