コード例 #1
0
        _exit(usage, 1, e)

    doer = DU_ABPTable(sModelName,
                       sModelDir,
                       C=options.crf_C,
                       tol=options.crf_tol,
                       njobs=options.crf_njobs,
                       max_iter=options.max_iter,
                       inference_cache=options.crf_inference_cache)

    if options.rm:
        doer.rm()
        sys.exit(0)

    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir)
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]
    #     if options.bAnnotate:
    #         doer.annotateDocument(lTrn)
    #         traceln('annotation done')
    #         sys.exit(0)

    traceln("- classes: ", doer.getGraphClass().getLabelNameList())

    ## use. a_mpxml files
    doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
コード例 #2
0
    
    # --- 
    #parse the command line
    (options, args) = parser.parse_args()
    
    # --- 
    try:
        sModelDir, sModelName = args
    except Exception as e:
        traceln("Specify a model folder and a model name!")
        _exit(usage, 1, e)
        
    doer = DU_ABPTableAnnotator(sModelName, sModelDir,
                      C                 = options.crf_C,
                      tol               = options.crf_tol,
                      njobs             = options.crf_njobs,
                      max_iter          = options.crf_max_iter,
                      inference_cache   = options.crf_inference_cache)
    
    
    
    if options.rm:
        doer.rm()
        sys.exit(0)

    lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]] 
    doer.annotateDocument(lTrn)
    traceln('annotation done')    
    
コード例 #3
0
                      inference_cache=options.crf_inference_cache)

    if options.rm:
        doer.rm()
        sys.exit(0)

    traceln("- classes: ", DU_GRAPH.getLabelNameList())

    if options.best_params:
        dBestParams = doer.getModelClass().loadBestParams(
            sModelDir, options.best_params)
        doer.setLearnerConfiguration(dBestParams)

    #Add the "out" subdir if needed
    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir, "out")
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(
                lFold, options.iFoldInitNum, bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
コード例 #4
0
def main(sModelDir, sModelName, options):
    doer = DU_ABPTableR(sModelName, sModelDir,
                      C                 = options.crf_C,
                      tol               = options.crf_tol,
                      njobs             = options.crf_njobs,
                      max_iter          = options.max_iter,
                      inference_cache   = options.crf_inference_cache)
    
    if options.rm:
        doer.rm()
        return

    lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir, bAbsolute=False) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]] 
#     if options.bAnnotate:
#         doer.annotateDocument(lTrn)
#         traceln('annotation done')    
#         sys.exit(0)
    
    
    traceln("- classes: ", doer.getGraphClass().getLabelNameList())
    
    ## use. a_mpxml files
    #doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern


    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(lFold, options.iFoldInitNum, bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm, options.pkl)
            traceln(oReport)
        elif options.bFoldFinish:
            tstReport = doer._nfold_Finish()
            traceln(tstReport)
        else:
            assert False, "Internal error"    
        #no more processing!!
        exit(0)
        #-------------------
        
    if lFold:
        loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl)
        sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt")
        traceln("Results are in %s"%sReportPickleFilename)
        graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt)
    elif lTrn:
        doer.train_save_test(lTrn, lTst, options.warm, options.pkl)
        try:    traceln("Baseline best estimator: %s"%doer.bsln_mdl.best_params_)   #for GridSearch
        except: pass
        traceln(" --- CRF Model ---")
        traceln(doer.getModel().getModelInfo())
    elif lTst:
        doer.load()
        tstReport = doer.test(lTst)
        traceln(tstReport)
        if options.bDetailedReport:
            traceln(tstReport.getDetailledReport())
            sReportPickleFilename = os.path.join(sModelDir, sModelName + "__detailled_report.txt")
            graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, tstReport)
    
    if lRun:
        if options.storeX or options.applyY:
            try: doer.load() 
            except: pass    #we only need the transformer
            lsOutputFilename = doer.runForExternalMLMethod(lRun, options.storeX, options.applyY, options.bRevertEdges)
        else:
            doer.load()
            lsOutputFilename = doer.predict(lRun)
            
        traceln("Done, see in:\n  %s"%lsOutputFilename)
コード例 #5
0
ファイル: UT_gcn.py プロジェクト: kapitsa2811/TranskribusDU
    def test_test_ensemble(self):
        fname = os.path.join('UT_MODELS', 'UT_mod2_m2_bestmodel.ckpt.index')

        model_dir = 'UT_MODELS'
        model_name = 'UT_mod2_ensemble'

        lTrn = _checkFindColDir('./abp_test')

        dLearnerConfig = \
            {
                "ecn_ensemble": [
                    {
                        "type": "ecn",
                        "name": "UT_mod2_m1",
                        "dropout_rate_edge": 0.0,
                        "dropout_rate_edge_feat": 0.0,
                        "dropout_rate_node": 0.0,
                        "lr": 0.001,
                        "mu": 0.0,
                        "nb_iter": 50,
                        "nconv_edge": 1,
                        "node_indim": -1,
                        "num_layers": 8,
                        "ratio_train_val": 0.1,
                        "patience": 500,
                        "activation_name": "tanh",
                        "stack_convolutions": True

                    }
                    ,
                    {
                        "type": "ecn",
                        "name": "UT_mod2_m2",
                        "dropout_rate_edge": 0.0,
                        "dropout_rate_edge_feat": 0.0,
                        "dropout_rate_node": 0.0,
                        "lr": 0.001,
                        "mu": 0.0,
                        "nb_iter": 50,
                        "nconv_edge": 1,
                        "node_indim": -1,
                        "num_layers": 8,
                        "ratio_train_val": 0.1,
                        "patience": 50,
                        "activation_name": "relu",
                        "stack_convolutions": True
                    }
                ]
            }

        if os.path.exists(fname):
            #Do the test
            doer = DU_ABPTable.DU_ABPTable_ECN(
                model_name, model_dir, dLearnerConfigArg=dLearnerConfig)
            doer.load()
            tstReport = doer.test(lTrn)

            acc, _ = tstReport.getClassificationReport()
            print(acc)
            self.assertTrue(acc > 0.5)
        else:
            self.fail('UT_mod1 was not trained')
コード例 #6
0
def main(sModelDir, sModelName, options):
    doer = DU_ABPTable_TypedCRF(sModelName,
                                sModelDir,
                                C=options.crf_C,
                                tol=options.crf_tol,
                                njobs=options.crf_njobs,
                                max_iter=options.max_iter,
                                inference_cache=options.crf_inference_cache)

    if options.docid:
        sDocId = options.docid
    else:
        sDocId = None
    if options.rm:
        doer.rm()
        return

    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir)
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]
    #     if options.bAnnotate:
    #         doer.annotateDocument(lTrn)
    #         traceln('annotation done')
    #         sys.exit(0)

    ## use. a_mpxml files
    doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(
                lFold, options.iFoldInitNum, bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
                                                  options.warm)
            traceln(oReport)
        elif options.bFoldFinish:
            tstReport = doer._nfold_Finish()
            traceln(tstReport)
        else:
            assert False, "Internal error"
        #no more processing!!
        exit(0)
        #-------------------

    if lFold:
        loTstRpt = doer.nfold_Eval(lFold, 3, .25, None)
        import graph.GraphModel
        sReportPickleFilename = os.path.join(sModelDir,
                                             sModelName + "__report.txt")
        traceln("Results are in %s" % sReportPickleFilename)
        graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename,
                                                      loTstRpt)
    elif lTrn:
        doer.train_save_test(lTrn, lTst, options.warm)
        try:
            traceln("Baseline best estimator: %s" %
                    doer.bsln_mdl.best_params_)  #for GridSearch
        except:
            pass
        traceln(" --- CRF Model ---")
        traceln(doer.getModel().getModelInfo())
    elif lTst:
        doer.load()
        tstReport = doer.test(lTst)
        traceln(tstReport)

    if lRun:
        doer.load()
        lsOutputFilename = doer.predict(lRun, sDocId)
        traceln("Done, see in:\n  %s" % lsOutputFilename)
コード例 #7
0
def main(sModelDir, sModelName, options):
    if options.use_ecn:
        if options.ecn_json_config is not None and options.ecn_json_config is not []:
            f = open(options.ecn_json_config[0])
            djson = json.loads(f.read())

            if "ecn_learner_config" in djson:
                dLearnerConfig = djson["ecn_learner_config"]
                f.close()
                doer = DU_ABPTable_ECN(sModelName,
                                       sModelDir,
                                       dLearnerConfigArg=dLearnerConfig)
            elif "ecn_ensemble" in djson:
                dLearnerConfig = djson
                f.close()
                doer = DU_ABPTable_ECN(sModelName,
                                       sModelDir,
                                       dLearnerConfigArg=dLearnerConfig)

        else:
            doer = DU_ABPTable_ECN(sModelName, sModelDir)
    elif options.use_gat:
        if options.gat_json_config is not None and options.gat_json_config is not []:

            f = open(options.gat_json_config[0])
            djson = json.loads(f.read())
            dLearnerConfig = djson["gat_learner_config"]
            f.close()
            doer = DU_ABPTable_GAT(sModelName,
                                   sModelDir,
                                   dLearnerConfigArg=dLearnerConfig)

        else:
            doer = DU_ABPTable_GAT(sModelName, sModelDir)

    else:
        doer = DU_ABPTable(sModelName,
                           sModelDir,
                           C=options.crf_C,
                           tol=options.crf_tol,
                           njobs=options.crf_njobs,
                           max_iter=options.crf_max_iter,
                           inference_cache=options.crf_inference_cache)

    if options.rm:
        doer.rm()
        return

    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir)
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]

    traceln("- classes: ", doer.getGraphClass().getLabelNameList())

    ## use. a_mpxml files
    doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(
                lFold,
                options.iFoldInitNum,
                test_size=0.25,
                random_state=None,
                bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
                                                  options.warm, options.pkl)
            traceln(oReport)
        elif options.bFoldFinish:
            tstReport = doer._nfold_Finish()
            traceln(tstReport)
        else:
            assert False, "Internal error"
        #no more processing!!
        exit(0)
        #-------------------

    if lFold:
        loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl)
        import crf.Model
        sReportPickleFilename = os.path.join(sModelDir,
                                             sModelName + "__report.txt")
        traceln("Results are in %s" % sReportPickleFilename)
        crf.Model.Model.gzip_cPickle_dump(sReportPickleFilename, loTstRpt)
    elif lTrn:
        doer.train_save_test(lTrn, lTst, options.warm, options.pkl)
        try:
            traceln("Baseline best estimator: %s" %
                    doer.bsln_mdl.best_params_)  #for GridSearch
        except:
            pass
        traceln(" --- CRF Model ---")
        traceln(doer.getModel().getModelInfo())
    elif lTst:
        doer.load()
        tstReport = doer.test(lTst)
        traceln(tstReport)
        if options.bDetailedReport:
            traceln(tstReport.getDetailledReport())
            import crf.Model
            for test in lTst:
                sReportPickleFilename = os.path.join(
                    '..', test, sModelName + "__report.pkl")
                traceln('Report dumped into %s' % sReportPickleFilename)
                crf.Model.Model.gzip_cPickle_dump(sReportPickleFilename,
                                                  tstReport)

    if lRun:
        if options.storeX or options.applyY:
            try:
                doer.load()
            except:
                pass  #we only need the transformer
            lsOutputFilename = doer.runForExternalMLMethod(
                lRun, options.storeX, options.applyY, options.bRevertEdges)
        else:
            doer.load()
            lsOutputFilename = doer.predict(lRun)

        traceln("Done, see in:\n  %s" % lsOutputFilename)
コード例 #8
0
def main(DU_BAR):
    version = "v.01"
    usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(
        sys.argv[0], version)
    parser.add_option("--docid",
                      dest='docid',
                      action="store",
                      default=None,
                      help="only process docid")
    # ---
    #parse the command line
    (options, args) = parser.parse_args()

    # ---
    try:
        sModelDir, sModelName = args
    except Exception as e:
        traceln("Specify a model folder and a model name!")
        _exit(usage, 1, e)

    doer = DU_BAR(sModelName,
                  sModelDir,
                  C=options.crf_C,
                  tol=options.crf_tol,
                  njobs=options.crf_njobs,
                  max_iter=options.max_iter,
                  inference_cache=options.crf_inference_cache)

    if options.docid:
        sDocId = options.docid
    else:
        sDocId = None
    if options.rm:
        doer.rm()
        sys.exit(0)

    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir)
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]
    #     if options.bAnnotate:
    #         doer.annotateDocument(lTrn)
    #         traceln('annotation done')
    #         sys.exit(0)

    ## use. a_mpxml files
    doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(
                lFold, options.iFoldInitNum, bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
                                                  options.warm)
            traceln(oReport)
        elif options.bFoldFinish:
            tstReport = doer._nfold_Finish()
            traceln(tstReport)
        else:
            assert False, "Internal error"
        #no more processing!!
        exit(0)
        #-------------------

    if lFold:
        loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl)
        import graph.GraphModel
        sReportPickleFilename = os.path.join(sModelDir,
                                             sModelName + "__report.txt")
        traceln("Results are in %s" % sReportPickleFilename)
        graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename,
                                                      loTstRpt)
    elif lTrn:
        doer.train_save_test(lTrn, lTst, options.warm, options.pkl)
        try:
            traceln("Baseline best estimator: %s" %
                    doer.bsln_mdl.best_params_)  #for GridSearch
        except:
            pass
        traceln(" --- CRF Model ---")
        traceln(doer.getModel().getModelInfo())
    elif lTst:
        doer.load()
        tstReport = doer.test(lTst)
        traceln(tstReport)

    if lRun:
        if options.storeX or options.applyY:
            try:
                doer.load()
            except:
                pass  #we only need the transformer
            lsOutputFilename = doer.runForExternalMLMethod(
                lRun, options.storeX, options.applyY)
        else:
            doer.load()
            lsOutputFilename = doer.predict(lRun)
        traceln("Done, see in:\n  %s" % lsOutputFilename)
コード例 #9
0
    def standardDo(self, options):
        """
        do whatever is reuested by an option from the parsed command line
        
        return None
        """
        if options.rm:
            self.rm()
            return
    
        lTrn, lTst, lRun, lFold = [_checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]]
        
        # Validation set if any
        try:
            ratio_train_val = float(options.lVld[0])
            lVld            = []
            if ratio_train_val <= 0 or 1.0 <= ratio_train_val: raise Exception("Bad ratio, not in ]0, 1[")
        except:
            ratio_train_val = None
            lVld            = _checkFindColDir(options.lVld)
                
        #traceln("- classes: ", doer.getGraphClass().getLabelNameList())
    
        ## use. a_mpxml files
        #doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern
    
        if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
            if options.iFoldInitNum:
                """
                initialization of a cross-validation
                """
                splitter, ts_trn, lFilename_trn = self._nfold_Init(lFold, options.iFoldInitNum, test_size=0.25, random_state=None, bStoreOnDisk=True)
            elif options.iFoldRunNum:
                """
                Run one fold
                """
                oReport = self._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm, options.bPkl)
                traceln(oReport)
            elif options.bFoldFinish:
                tstReport = self._nfold_Finish()
                traceln(tstReport)
            else:
                assert False, "Internal error"

            return
    
    
        if lFold:
            loTstRpt = self.nfold_Eval(lFold, 3, .25, None, options.bPkl)
            sReportPickleFilename = os.path.join(self.sModelDir, self.sModelName + "__report.txt")
            traceln("Results are in %s"%sReportPickleFilename)
            GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt)
        elif lTrn or lTst or lRun:
            if lTrn:
                tstReport = self.train_save_test(lTrn, lTst, lVld, options.warm, options.bPkl
                                                 , ratio_train_val=ratio_train_val)
                try:    traceln("Baseline best estimator: %s"%self.bsln_mdl.best_params_)   #for GridSearch
                except: pass
                traceln(self.getModel().getModelInfo())
                if lTst:
                    traceln(tstReport)
                    if options.bDetailedReport:
                        traceln(tstReport.getDetailledReport())
            elif lTst:
                self.load()
                tstReport = self.test(lTst)
                traceln(tstReport)
                if options.bDetailedReport:
                    traceln(tstReport.getDetailledReport())
                    for test in lTst:
                        sReportPickleFilename = os.path.join('..',test, self.sModelName + "__report.pkl")
                        traceln('Report dumped into %s'%sReportPickleFilename)
                        GraphModel.gzip_cPickle_dump(sReportPickleFilename, tstReport)
        
            if lRun:
#                 if options.storeX or options.applyY:
#                     try: self.load()
#                     except: pass    #we only need the transformer
#                     lsOutputFilename = self.runForExternalMLMethod(lRun, options.storeX, options.applyY, options.bRevertEdges)
#                 else:
                self.load()
                lsOutputFilename = self.predict(lRun, bGraph=options.bGraph)
        
                traceln("Done, see in:\n  %s"%lsOutputFilename)
        else:
            traceln("No action specified in command line. Doing nothing... :)")
            
        return