_exit(usage, 1, e) doer = DU_ABPTable(sModelName, sModelDir, C=options.crf_C, tol=options.crf_tol, njobs=options.crf_njobs, max_iter=options.max_iter, inference_cache=options.crf_inference_cache) if options.rm: doer.rm() sys.exit(0) lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] # if options.bAnnotate: # doer.annotateDocument(lTrn) # traceln('annotation done') # sys.exit(0) traceln("- classes: ", doer.getGraphClass().getLabelNameList()) ## use. a_mpxml files doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """
# --- #parse the command line (options, args) = parser.parse_args() # --- try: sModelDir, sModelName = args except Exception as e: traceln("Specify a model folder and a model name!") _exit(usage, 1, e) doer = DU_ABPTableAnnotator(sModelName, sModelDir, C = options.crf_C, tol = options.crf_tol, njobs = options.crf_njobs, max_iter = options.crf_max_iter, inference_cache = options.crf_inference_cache) if options.rm: doer.rm() sys.exit(0) lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]] doer.annotateDocument(lTrn) traceln('annotation done')
inference_cache=options.crf_inference_cache) if options.rm: doer.rm() sys.exit(0) traceln("- classes: ", DU_GRAPH.getLabelNameList()) if options.best_params: dBestParams = doer.getModelClass().loadBestParams( sModelDir, options.best_params) doer.setLearnerConfiguration(dBestParams) #Add the "out" subdir if needed lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir, "out") for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init( lFold, options.iFoldInitNum, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
def main(sModelDir, sModelName, options): doer = DU_ABPTableR(sModelName, sModelDir, C = options.crf_C, tol = options.crf_tol, njobs = options.crf_njobs, max_iter = options.max_iter, inference_cache = options.crf_inference_cache) if options.rm: doer.rm() return lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir, bAbsolute=False) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]] # if options.bAnnotate: # doer.annotateDocument(lTrn) # traceln('annotation done') # sys.exit(0) traceln("- classes: ", doer.getGraphClass().getLabelNameList()) ## use. a_mpxml files #doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init(lFold, options.iFoldInitNum, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm, options.pkl) traceln(oReport) elif options.bFoldFinish: tstReport = doer._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" #no more processing!! exit(0) #------------------- if lFold: loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl) sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt") traceln("Results are in %s"%sReportPickleFilename) graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn: doer.train_save_test(lTrn, lTst, options.warm, options.pkl) try: traceln("Baseline best estimator: %s"%doer.bsln_mdl.best_params_) #for GridSearch except: pass traceln(" --- CRF Model ---") traceln(doer.getModel().getModelInfo()) elif lTst: doer.load() tstReport = doer.test(lTst) traceln(tstReport) if options.bDetailedReport: traceln(tstReport.getDetailledReport()) sReportPickleFilename = os.path.join(sModelDir, sModelName + "__detailled_report.txt") graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, tstReport) if lRun: if options.storeX or options.applyY: try: doer.load() except: pass #we only need the transformer lsOutputFilename = doer.runForExternalMLMethod(lRun, options.storeX, options.applyY, options.bRevertEdges) else: doer.load() lsOutputFilename = doer.predict(lRun) traceln("Done, see in:\n %s"%lsOutputFilename)
def test_test_ensemble(self): fname = os.path.join('UT_MODELS', 'UT_mod2_m2_bestmodel.ckpt.index') model_dir = 'UT_MODELS' model_name = 'UT_mod2_ensemble' lTrn = _checkFindColDir('./abp_test') dLearnerConfig = \ { "ecn_ensemble": [ { "type": "ecn", "name": "UT_mod2_m1", "dropout_rate_edge": 0.0, "dropout_rate_edge_feat": 0.0, "dropout_rate_node": 0.0, "lr": 0.001, "mu": 0.0, "nb_iter": 50, "nconv_edge": 1, "node_indim": -1, "num_layers": 8, "ratio_train_val": 0.1, "patience": 500, "activation_name": "tanh", "stack_convolutions": True } , { "type": "ecn", "name": "UT_mod2_m2", "dropout_rate_edge": 0.0, "dropout_rate_edge_feat": 0.0, "dropout_rate_node": 0.0, "lr": 0.001, "mu": 0.0, "nb_iter": 50, "nconv_edge": 1, "node_indim": -1, "num_layers": 8, "ratio_train_val": 0.1, "patience": 50, "activation_name": "relu", "stack_convolutions": True } ] } if os.path.exists(fname): #Do the test doer = DU_ABPTable.DU_ABPTable_ECN( model_name, model_dir, dLearnerConfigArg=dLearnerConfig) doer.load() tstReport = doer.test(lTrn) acc, _ = tstReport.getClassificationReport() print(acc) self.assertTrue(acc > 0.5) else: self.fail('UT_mod1 was not trained')
def main(sModelDir, sModelName, options): doer = DU_ABPTable_TypedCRF(sModelName, sModelDir, C=options.crf_C, tol=options.crf_tol, njobs=options.crf_njobs, max_iter=options.max_iter, inference_cache=options.crf_inference_cache) if options.docid: sDocId = options.docid else: sDocId = None if options.rm: doer.rm() return lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] # if options.bAnnotate: # doer.annotateDocument(lTrn) # traceln('annotation done') # sys.exit(0) ## use. a_mpxml files doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init( lFold, options.iFoldInitNum, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm) traceln(oReport) elif options.bFoldFinish: tstReport = doer._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" #no more processing!! exit(0) #------------------- if lFold: loTstRpt = doer.nfold_Eval(lFold, 3, .25, None) import graph.GraphModel sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt") traceln("Results are in %s" % sReportPickleFilename) graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn: doer.train_save_test(lTrn, lTst, options.warm) try: traceln("Baseline best estimator: %s" % doer.bsln_mdl.best_params_) #for GridSearch except: pass traceln(" --- CRF Model ---") traceln(doer.getModel().getModelInfo()) elif lTst: doer.load() tstReport = doer.test(lTst) traceln(tstReport) if lRun: doer.load() lsOutputFilename = doer.predict(lRun, sDocId) traceln("Done, see in:\n %s" % lsOutputFilename)
def main(sModelDir, sModelName, options): if options.use_ecn: if options.ecn_json_config is not None and options.ecn_json_config is not []: f = open(options.ecn_json_config[0]) djson = json.loads(f.read()) if "ecn_learner_config" in djson: dLearnerConfig = djson["ecn_learner_config"] f.close() doer = DU_ABPTable_ECN(sModelName, sModelDir, dLearnerConfigArg=dLearnerConfig) elif "ecn_ensemble" in djson: dLearnerConfig = djson f.close() doer = DU_ABPTable_ECN(sModelName, sModelDir, dLearnerConfigArg=dLearnerConfig) else: doer = DU_ABPTable_ECN(sModelName, sModelDir) elif options.use_gat: if options.gat_json_config is not None and options.gat_json_config is not []: f = open(options.gat_json_config[0]) djson = json.loads(f.read()) dLearnerConfig = djson["gat_learner_config"] f.close() doer = DU_ABPTable_GAT(sModelName, sModelDir, dLearnerConfigArg=dLearnerConfig) else: doer = DU_ABPTable_GAT(sModelName, sModelDir) else: doer = DU_ABPTable(sModelName, sModelDir, C=options.crf_C, tol=options.crf_tol, njobs=options.crf_njobs, max_iter=options.crf_max_iter, inference_cache=options.crf_inference_cache) if options.rm: doer.rm() return lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] traceln("- classes: ", doer.getGraphClass().getLabelNameList()) ## use. a_mpxml files doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init( lFold, options.iFoldInitNum, test_size=0.25, random_state=None, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm, options.pkl) traceln(oReport) elif options.bFoldFinish: tstReport = doer._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" #no more processing!! exit(0) #------------------- if lFold: loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl) import crf.Model sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt") traceln("Results are in %s" % sReportPickleFilename) crf.Model.Model.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn: doer.train_save_test(lTrn, lTst, options.warm, options.pkl) try: traceln("Baseline best estimator: %s" % doer.bsln_mdl.best_params_) #for GridSearch except: pass traceln(" --- CRF Model ---") traceln(doer.getModel().getModelInfo()) elif lTst: doer.load() tstReport = doer.test(lTst) traceln(tstReport) if options.bDetailedReport: traceln(tstReport.getDetailledReport()) import crf.Model for test in lTst: sReportPickleFilename = os.path.join( '..', test, sModelName + "__report.pkl") traceln('Report dumped into %s' % sReportPickleFilename) crf.Model.Model.gzip_cPickle_dump(sReportPickleFilename, tstReport) if lRun: if options.storeX or options.applyY: try: doer.load() except: pass #we only need the transformer lsOutputFilename = doer.runForExternalMLMethod( lRun, options.storeX, options.applyY, options.bRevertEdges) else: doer.load() lsOutputFilename = doer.predict(lRun) traceln("Done, see in:\n %s" % lsOutputFilename)
def main(DU_BAR): version = "v.01" usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser( sys.argv[0], version) parser.add_option("--docid", dest='docid', action="store", default=None, help="only process docid") # --- #parse the command line (options, args) = parser.parse_args() # --- try: sModelDir, sModelName = args except Exception as e: traceln("Specify a model folder and a model name!") _exit(usage, 1, e) doer = DU_BAR(sModelName, sModelDir, C=options.crf_C, tol=options.crf_tol, njobs=options.crf_njobs, max_iter=options.max_iter, inference_cache=options.crf_inference_cache) if options.docid: sDocId = options.docid else: sDocId = None if options.rm: doer.rm() sys.exit(0) lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] # if options.bAnnotate: # doer.annotateDocument(lTrn) # traceln('annotation done') # sys.exit(0) ## use. a_mpxml files doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init( lFold, options.iFoldInitNum, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm) traceln(oReport) elif options.bFoldFinish: tstReport = doer._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" #no more processing!! exit(0) #------------------- if lFold: loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl) import graph.GraphModel sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt") traceln("Results are in %s" % sReportPickleFilename) graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn: doer.train_save_test(lTrn, lTst, options.warm, options.pkl) try: traceln("Baseline best estimator: %s" % doer.bsln_mdl.best_params_) #for GridSearch except: pass traceln(" --- CRF Model ---") traceln(doer.getModel().getModelInfo()) elif lTst: doer.load() tstReport = doer.test(lTst) traceln(tstReport) if lRun: if options.storeX or options.applyY: try: doer.load() except: pass #we only need the transformer lsOutputFilename = doer.runForExternalMLMethod( lRun, options.storeX, options.applyY) else: doer.load() lsOutputFilename = doer.predict(lRun) traceln("Done, see in:\n %s" % lsOutputFilename)
def standardDo(self, options): """ do whatever is reuested by an option from the parsed command line return None """ if options.rm: self.rm() return lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]] # Validation set if any try: ratio_train_val = float(options.lVld[0]) lVld = [] if ratio_train_val <= 0 or 1.0 <= ratio_train_val: raise Exception("Bad ratio, not in ]0, 1[") except: ratio_train_val = None lVld = _checkFindColDir(options.lVld) #traceln("- classes: ", doer.getGraphClass().getLabelNameList()) ## use. a_mpxml files #doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = self._nfold_Init(lFold, options.iFoldInitNum, test_size=0.25, random_state=None, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = self._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm, options.bPkl) traceln(oReport) elif options.bFoldFinish: tstReport = self._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" return if lFold: loTstRpt = self.nfold_Eval(lFold, 3, .25, None, options.bPkl) sReportPickleFilename = os.path.join(self.sModelDir, self.sModelName + "__report.txt") traceln("Results are in %s"%sReportPickleFilename) GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn or lTst or lRun: if lTrn: tstReport = self.train_save_test(lTrn, lTst, lVld, options.warm, options.bPkl , ratio_train_val=ratio_train_val) try: traceln("Baseline best estimator: %s"%self.bsln_mdl.best_params_) #for GridSearch except: pass traceln(self.getModel().getModelInfo()) if lTst: traceln(tstReport) if options.bDetailedReport: traceln(tstReport.getDetailledReport()) elif lTst: self.load() tstReport = self.test(lTst) traceln(tstReport) if options.bDetailedReport: traceln(tstReport.getDetailledReport()) for test in lTst: sReportPickleFilename = os.path.join('..',test, self.sModelName + "__report.pkl") traceln('Report dumped into %s'%sReportPickleFilename) GraphModel.gzip_cPickle_dump(sReportPickleFilename, tstReport) if lRun: # if options.storeX or options.applyY: # try: self.load() # except: pass #we only need the transformer # lsOutputFilename = self.runForExternalMLMethod(lRun, options.storeX, options.applyY, options.bRevertEdges) # else: self.load() lsOutputFilename = self.predict(lRun, bGraph=options.bGraph) traceln("Done, see in:\n %s"%lsOutputFilename) else: traceln("No action specified in command line. Doing nothing... :)") return