Beispiel #1
0
            gridPointDir = "grid/gridpoint-" + pId
            assert gridCSC.exists(gridPointDir)
            if gridCSC.exists(gridPointDir + "/results.csv"):
                print >> sys.stderr, "Downloading results"
                gridCSC.download(gridPointDir + "/results.csv",
                                 "results" + pId + ".csv")
            else:
                print >> sys.stderr, "Run not yet finished"
                finished = False
        time.sleep(60)

if options.mode in ["ALL", "GRID_EVALUATE"]:
    bestResult = (-1, None, None)
    for filename in os.listdir(WORKDIR):
        if filename[-4:] == ".csv" and os.path.getsize(filename) != 0:
            gridRows = TableUtils.readCSV(filename)
            fscore = None
            for row in gridRows:
                if row["eval"] == "approximate" and row[
                        "event_class"] == "ALL-TOTAL":
                    fscore = row["fscore"]
                    break
            assert fscore != None, row
            if fscore > bestResult[0]:
                bestResult = (fscore, gridRows, filename)
    print bestResult

#if options.mode in ["]
#    print >> sys.stderr, "Grid search complete"
#    print >> sys.stderr, "Tested", count - options.startFrom, "out of", count, "combinations"
#    print >> sys.stderr, "Best parameter combination:", bestResults[0]
Beispiel #2
0
    try:
        import psyco
        psyco.full()
        print >> sys.stderr, "Found Psyco, using"
    except ImportError:
        print >> sys.stderr, "Psyco not installed"
    sys.path.append("..")
    from Utils.ProgressCounter import ProgressCounter
    from Utils.Parameters import splitParameters
    from optparse import OptionParser
    import Core.ExampleUtils as ExampleUtils
    from Core.IdSet import IdSet
    import Utils.TableUtils as TableUtils
    optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.")
    optparser.add_option("-i", "--input", default=None, dest="input", help="Input file in csv-format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output file for the statistics")
    optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class")
    (options, args) = optparser.parse_args()

    print >> sys.stderr, "Importing modules"
    exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass"
    
    if options.output != None:
        print >> sys.stderr, "Outputfile exists, removing", options.output
        if os.path.exists(options.output):
            os.remove(options.output)

    # Read input data
    fieldnames = ["class","prediction","id","fold"]
    rows = TableUtils.readCSV(options.input, fieldnames)
    evaluateCSV(rows, options, EvaluatorClass)
    try:
        import psyco
        psyco.full()
        print >> sys.stderr, "Found Psyco, using"
    except ImportError:
        print >> sys.stderr, "Psyco not installed"
    sys.path.append("..")
    from Utils.ProgressCounter import ProgressCounter
    from Utils.Parameters import splitParameters
    from optparse import OptionParser
    import Core.ExampleUtils as ExampleUtils
    from Core.IdSet import IdSet
    import Utils.TableUtils as TableUtils
    optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.")
    optparser.add_option("-i", "--input", default=None, dest="input", help="Input file in csv-format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output file for the statistics")
    optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class")
    (options, args) = optparser.parse_args()

    print >> sys.stderr, "Importing modules"
    exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass"
    
    if options.output != None:
        print >> sys.stderr, "Outputfile exists, removing", options.output
        if os.path.exists(options.output):
            os.remove(options.output)

    # Read input data
    fieldnames = ["class","prediction","id","fold"]
    rows = TableUtils.readCSV(options.input, fieldnames)
    evaluateCSV(rows, options, EvaluatorClass)
Beispiel #4
0
            classNameDict[classId] = className
        classNameFile.close()
        #classSet = IdSet(idDict=classNameDict, locked=True)

    if options.output != None:
        print >> sys.stderr, "Outputfile exists, removing", options.output
        if os.path.exists(options.output):
            os.remove(options.output)
    
    print >> sys.stderr, "Importing modules"
    exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass"
    fieldnames = ["class","prediction","id","fold","c"]
    
    # Find best c-parameter from parameter estimation data
    print >> sys.stderr, "Finding optimal c-parameters from", options.parameters    
    rows = TableUtils.readCSV(options.parameters, fieldnames)
    folds = sorted(list(TableUtils.getValueSet(rows, "fold")))
    cParameterByFold = {}
    for fold in folds:
        print >> sys.stderr, "  Processing fold", fold
        foldRows = TableUtils.selectRowsCSV(rows, {"fold":fold})
        cParameters = sorted(list(TableUtils.getValueSet(foldRows, "c")))
        evaluators = []
        cParameterByEvaluator = {}
        for cParameter in cParameters:
            print >> sys.stderr, "    Processing c-parameter", cParameter, 
            paramRows = TableUtils.selectRowsCSV(foldRows, {"c":cParameter})
            evaluator = Evaluator.calculateFromCSV(paramRows, EvaluatorClass)
            #print evaluator.toStringConcise()
            cParameterByEvaluator[evaluator] = cParameter
            evaluators.append(evaluator)
Beispiel #5
0
            classNameDict[classId] = className
        classNameFile.close()
        #classSet = IdSet(idDict=classNameDict, locked=True)

    if options.output != None:
        print >> sys.stderr, "Outputfile exists, removing", options.output
        if os.path.exists(options.output):
            os.remove(options.output)

    print >> sys.stderr, "Importing modules"
    exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass"
    fieldnames = ["class", "prediction", "id", "fold", "c"]

    # Find best c-parameter from parameter estimation data
    print >> sys.stderr, "Finding optimal c-parameters from", options.parameters
    rows = TableUtils.readCSV(options.parameters, fieldnames)
    folds = sorted(list(TableUtils.getValueSet(rows, "fold")))
    cParameterByFold = {}
    for fold in folds:
        print >> sys.stderr, "  Processing fold", fold
        foldRows = TableUtils.selectRowsCSV(rows, {"fold": fold})
        cParameters = sorted(list(TableUtils.getValueSet(foldRows, "c")))
        evaluators = []
        cParameterByEvaluator = {}
        for cParameter in cParameters:
            print >> sys.stderr, "    Processing c-parameter", cParameter,
            paramRows = TableUtils.selectRowsCSV(foldRows, {"c": cParameter})
            evaluator = Evaluator.calculateFromCSV(paramRows, EvaluatorClass)
            #print evaluator.toStringConcise()
            cParameterByEvaluator[evaluator] = cParameter
            evaluators.append(evaluator)
Beispiel #6
0
            pId = getCombinationString(params) #"-boost_"+str(param)[0:3] # param id
            gridPointDir = "grid/gridpoint-"+pId
            assert gridCSC.exists(gridPointDir)
            if gridCSC.exists(gridPointDir + "/results.csv"):
                print >> sys.stderr, "Downloading results"
                gridCSC.download(gridPointDir + "/results.csv", "results"+pId+".csv")
            else:
                print >> sys.stderr, "Run not yet finished"
                finished = False
        time.sleep(60)

if options.mode in ["ALL", "GRID_EVALUATE"]:
    bestResult = (-1, None, None)
    for filename in os.listdir(WORKDIR):
        if filename[-4:] == ".csv" and os.path.getsize(filename) != 0:
            gridRows = TableUtils.readCSV(filename)
            fscore = None
            for row in gridRows:
                if row["eval"] == "approximate" and row["event_class"] == "ALL-TOTAL":
                    fscore = row["fscore"]
                    break
            assert fscore != None, row
            if fscore > bestResult[0]:
                bestResult = (fscore, gridRows, filename)
    print bestResult
            

#if options.mode in ["]
#    print >> sys.stderr, "Grid search complete"
#    print >> sys.stderr, "Tested", count - options.startFrom, "out of", count, "combinations"
#    print >> sys.stderr, "Best parameter combination:", bestResults[0]