Ejemplo n.º 1
0
def main():
    parser = OptionParser(usage='train and evaluate ML model for DDI classification based on the DDI corpus')
    parser.add_option("-f", "--file", dest="file",  action="store", default="pairs.pickle",
                      help="Pickle file to load/store the data")
    parser.add_option("-d", "--dir", action="store", dest="dir", type = "string", default="DDICorpus/Test/DDIextraction/MedLine/",
                      help="Corpus directory with XML files")
    parser.add_option("--reload", action="store_true", default=False, dest="reload",
                      help="Reload corpus")
    parser.add_option("--log", action="store", dest="loglevel", type = "string", default="WARNING",
                      help="Log level")
    parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log",
                      help="Log file")
    parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10,
                      help="Number of cross-validation folds")
    parser.add_option("--action", action="store", dest="action", type="string", default="cv",
                      help="cv, train, test, or classify")
    parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk",
                      help="slk, svmtk")
    (options, args) = parser.parse_args()
    numeric_level = getattr(logging, options.loglevel.upper(), None)


    while len(logging.root.handlers) > 0:
        logging.root.removeHandler(logging.root.handlers[-1])
    logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s')
    #logging.getLogger().setLevel(numeric_level)
    logging.debug("debug test")
    logging.info("info test")
    logging.warning("warning test")


    if options.file in os.listdir(os.getcwd()) and not options.reload:
        print "loading corpus pickle", options.file
        docs = pickle.load(open(options.file, 'rb'))
    else:
        print "loading corpus", options.dir
        docs = relations.loadCorpus(options.dir)
        pickle.dump(docs, open(options.file, 'wb'))
Ejemplo n.º 2
0
def main():
    parser = OptionParser(
        usage=
        'train and evaluate ML model for DDI classification based on the DDI corpus'
    )
    parser.add_option("-f",
                      "--file",
                      dest="file",
                      action="store",
                      default="pairs.pickle",
                      help="Pickle file to load/store the data")
    parser.add_option("-d",
                      "--dir",
                      action="store",
                      dest="dir",
                      type="string",
                      default="DDICorpus/Test/DDIextraction/MedLine/",
                      help="Corpus directory with XML files")
    parser.add_option("--reload",
                      action="store_true",
                      default=False,
                      dest="reload",
                      help="Reload corpus")
    parser.add_option("--log",
                      action="store",
                      dest="loglevel",
                      type="string",
                      default="WARNING",
                      help="Log level")
    parser.add_option("--logfile",
                      action="store",
                      dest="logfile",
                      type="string",
                      default="kernel.log",
                      help="Log file")
    parser.add_option("--nfolds",
                      action="store",
                      dest="nfolds",
                      type="int",
                      default=10,
                      help="Number of cross-validation folds")
    parser.add_option("--action",
                      action="store",
                      dest="action",
                      type="string",
                      default="cv",
                      help="cv, train, test, or classify")
    parser.add_option("--kernel",
                      action="store",
                      dest="kernel",
                      type="string",
                      default="slk",
                      help="slk, svmtk")
    (options, args) = parser.parse_args()
    numeric_level = getattr(logging, options.loglevel.upper(), None)

    while len(logging.root.handlers) > 0:
        logging.root.removeHandler(logging.root.handlers[-1])
    logging.basicConfig(level=numeric_level,
                        format='%(asctime)s %(levelname)s %(message)s')
    #logging.getLogger().setLevel(numeric_level)
    logging.debug("debug test")
    logging.info("info test")
    logging.warning("warning test")

    if options.file in os.listdir(os.getcwd()) and not options.reload:
        print "loading corpus pickle", options.file
        docs = pickle.load(open(options.file, 'rb'))
    else:
        print "loading corpus", options.dir
        docs = relations.loadCorpus(options.dir)
        pickle.dump(docs, open(options.file, 'wb'))
    #build_data_frame(docs)
    #if 'parsetree' not in docs['info']:
    #    for doc in docs:
    #        for s in docs[doc]:
    #            docs[doc][s]['parsetree'] = gettree(docs[doc][s]['tokens'])
    #        docs['info'].append('parsetree')

    #trainEvaluatePairs(docs, nfolds=options.nfolds)

    if options.kernel == 'slk':
        generatejSREdata(docs, options.action + '_pairs.txt')
        if options.action == 'train':
            trainjSRE(options.kernel + '_' + options.action + '_pairs.txt')
        elif options.action == 'test':
            testjSRE(options.kernel + '_' + options.action + '_pairs.txt',
                     options.kernel + '_' + "test_results.txt")
    elif options.kernel == 'svmtk':
        generateSVMTKdata(docs)
        if options.action == 'train':
            trainSVMTK(options.kernel + '_' + options.action + '_pairs.txt')
        elif options.action == 'test':
            testSVMTK(options.kernel + '_' + options.action + '_pairs.txt',
                      options.kernel + '_' + "test_results.txt")

        generateSVMTKdata(docs)
Ejemplo n.º 3
0
def main():
    parser = OptionParser(
        usage=
        'train and evaluate ML model for DDI classification based on the DDI corpus'
    )
    parser.add_option("-f",
                      "--file",
                      dest="file",
                      action="store",
                      default="pairs.pickle",
                      help="Pickle file to load/store the data")
    parser.add_option("-d",
                      "--dir",
                      action="store",
                      dest="dir",
                      type="string",
                      default="DDICorpus/Test/DDIextraction/MedLine/",
                      help="Corpus directory with XML files")
    parser.add_option("--reload",
                      action="store_true",
                      default=False,
                      dest="reload",
                      help="Reload corpus")
    parser.add_option("--log",
                      action="store",
                      dest="loglevel",
                      type="string",
                      default="WARNING",
                      help="Log level")
    parser.add_option("--logfile",
                      action="store",
                      dest="logfile",
                      type="string",
                      default="kernel.log",
                      help="Log file")
    parser.add_option("--nfolds",
                      action="store",
                      dest="nfolds",
                      type="int",
                      default=10,
                      help="Number of cross-validation folds")
    parser.add_option("--action",
                      action="store",
                      dest="action",
                      type="string",
                      default="cv",
                      help="cv, train, test, or classify")
    parser.add_option("--kernel",
                      action="store",
                      dest="kernel",
                      type="string",
                      default="slk",
                      help="slk, svmtk")
    (options, args) = parser.parse_args()
    numeric_level = getattr(logging, options.loglevel.upper(), None)

    while len(logging.root.handlers) > 0:
        logging.root.removeHandler(logging.root.handlers[-1])
    logging.basicConfig(level=numeric_level,
                        format='%(asctime)s %(levelname)s %(message)s')
    #logging.getLogger().setLevel(numeric_level)
    logging.debug("debug test")
    logging.info("info test")
    logging.warning("warning test")

    if options.file in os.listdir(os.getcwd()) and not options.reload:
        print "loading corpus pickle", options.file
        docs = pickle.load(open(options.file, 'rb'))
    else:
        print "loading corpus", options.dir
        docs = relations.loadCorpus(options.dir)
        pickle.dump(docs, open(options.file, 'wb'))
Ejemplo n.º 4
0
def main():
    parser = OptionParser(usage='train and evaluate ML model for DDI classification based on the DDI corpus')
    parser.add_option("-f", "--file", dest="file",  action="store", default="pairs.pickle",
                      help="Pickle file to load/store the data")
    parser.add_option("-d", "--dir", action="store", dest="dir", type = "string", default="DDICorpus/Test/DDIextraction/MedLine/",
                      help="Corpus directory with XML files")
    parser.add_option("--reload", action="store_true", default=False, dest="reload",
                      help="Reload corpus")
    parser.add_option("--log", action="store", dest="loglevel", type = "string", default="WARNING",
                      help="Log level")
    parser.add_option("--logfile", action="store", dest="logfile", type="string", default="kernel.log",
                      help="Log file")
    parser.add_option("--nfolds", action="store", dest="nfolds", type="int", default=10,
                      help="Number of cross-validation folds")
    parser.add_option("--action", action="store", dest="action", type="string", default="cv",
                      help="cv, train, test, or classify")
    parser.add_option("--kernel", action="store", dest="kernel", type="string", default="slk",
                      help="slk, svmtk")
    (options, args) = parser.parse_args()
    numeric_level = getattr(logging, options.loglevel.upper(), None)


    while len(logging.root.handlers) > 0:
        logging.root.removeHandler(logging.root.handlers[-1])
    logging.basicConfig(level=numeric_level, format='%(asctime)s %(levelname)s %(message)s')
    #logging.getLogger().setLevel(numeric_level)
    logging.debug("debug test")
    logging.info("info test")
    logging.warning("warning test")


    if options.file in os.listdir(os.getcwd()) and not options.reload:
        print "loading corpus pickle", options.file
        docs = pickle.load(open(options.file, 'rb'))
    else:
        print "loading corpus", options.dir
        docs = relations.loadCorpus(options.dir)
        pickle.dump(docs, open(options.file, 'wb'))
    #build_data_frame(docs)
    #if 'parsetree' not in docs['info']:
    #    for doc in docs:
    #        for s in docs[doc]:
    #            docs[doc][s]['parsetree'] = gettree(docs[doc][s]['tokens'])
    #        docs['info'].append('parsetree')

    #trainEvaluatePairs(docs, nfolds=options.nfolds)

    if options.kernel == 'slk':
        generatejSREdata(docs, options.action + '_pairs.txt')
        if options.action == 'train':
            trainjSRE(options.kernel + '_' + options.action + '_pairs.txt')
        elif options.action == 'test':
            testjSRE(options.kernel + '_' +options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt")
    elif options.kernel == 'svmtk':
        generateSVMTKdata(docs)    
        if options.action == 'train':
            trainSVMTK(options.kernel + '_' +options.action + '_pairs.txt')
        elif options.action == 'test':
            testSVMTK(options.kernel + '_' +options.action + '_pairs.txt', options.kernel + '_' + "test_results.txt")

    
        generateSVMTKdata(docs)