Esempio n. 1
0
def String2VariableExpr(s, reject_internal):
    s = sstrip(s)

    if (String2ConstantExpr(s) is not None):
        return None

    gid = -1

    tokens = tft_utils.String2Tokens(s, "$")
    assert (len(tokens) in [1, 2, 3])

    # get label
    label = tokens[0]

    eid = None
    label_eid = tft_utils.String2Tokens(label, "_eid_")
    assert (len(label_eid) in [1, 2])

    if (len(label_eid) == 2):
        label = label_eid[0]
        eid = int(label_eid[1])

    # check label validity
    if (label[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]):
        return None

    # get gid
    if (len(tokens) in [2, 3]):
        gid = int(tokens[1])

    # get vtype
    vtype = Fraction

    if (len(tokens) in [3]):
        if (tokens[2] == "Int"):
            vtype = int
        elif (tokens[2] == "Real"):
            vtype = Fraction
        else:
            assert (False)

    # create expression
    if (reject_internal):
        assert (0 <= gid)

    if (eid is not None):
        for ve in EXPR.ALL_VariableExprs:
            if (ve.index == eid):
                return ve

        print("ERROR: variable labeled with [" + label + "] " +
              "with expression id [" + str(eid) + "] was not defined...")
        assert (False)

    else:
        return EXPR.VariableExpr(label, vtype, gid, reject_internal)
Esempio n. 2
0
    def loadFromShortString(self, ss):
        tokens = tft_utils.String2Tokens(ss, " ")

        for gid_eps in tokens:
            ts = tft_utils.String2Tokens(gid_eps, ">")
            assert (len(ts) == 2)

            gid = int(ts[0])
            eps = Fraction(ts[1])

            assert (gid not in self.gid2eps.keys())
            self.gid2eps[gid] = eps
Esempio n. 3
0
def scikitDecisionTreeTraining(tdata):
    global DT_MODEL

    assert (os.path.isfile(tdata))

    # -- load training data --
    ftrain = open(tdata, "r")

    l_feat = None

    feats = []
    labels = []

    for aline in ftrain:
        aline = aline.strip()

        if (aline == ""):
            continue

        tokens = tft_utils.String2Tokens(aline, " ")
        assert (len(tokens) >= 2)
        if (l_feat is None):
            l_feat = len(tokens) - 1
        else:
            assert (len(tokens) == l_feat + 1)

        this_feat = []

        for i in range(0, len(tokens)):
            if (i == 0):  # get the label
                labels.append(int(tokens[0]))

            else:  # the a feature
                assert (tokens[i].startswith(str(i) + ":"))

                this_feat.append(float(tokens[i][len(str(i) + ":"):]))

        assert (len(this_feat) == l_feat)
        feats.append(this_feat)

    ftrain.close()
    assert (len(labels) == len(feats))

    # -- train the model --
    DT_MODEL = tree.DecisionTreeClassifier()
    DT_MODEL.max_depth = MAX_DT_DEPTH
    DT_MODEL = DT_MODEL.fit(feats, labels)

    # -- export --
    scikitExportDecisionTree2Dot(FNAME_DT_DOT)
Esempio n. 4
0
def String2BoundedVariableExpr(s):
    tokens = tft_utils.String2Tokens(s, "in")
    assert (len(tokens) == 2)
    assert (tokens[1].startswith("[") and tokens[1].endswith("]"))

    var = String2Expr(tokens[0], True)
    assert (isinstance(var, EXPR.VariableExpr))

    ran = tft_utils.String2Tokens(tokens[1][1:len(tokens[1]) - 1], ",")
    assert (len(ran) == 2)
    vlb = EXPR.ConstantExpr(var.type()(ran[0]))
    vub = EXPR.ConstantExpr(var.type()(ran[1]))
    assert (isinstance(vlb, EXPR.ConstantExpr))
    assert (isinstance(vub, EXPR.ConstantExpr))

    if (not var.hasBounds()):
        var.setBounds(vlb, vub)
    else:
        # assert(var.lb() == vlb)
        # assert(var.ub() == vub)
        assert (abs(var.lb().value() - vlb.value()) <= float(1e-07))
        assert (abs(var.ub().value() - vub.value()) <= float(1e-07))

    return var
Esempio n. 5
0
def String2Op(s, op_class, op_labels=[]):
    s = sstrip(s)

    gid = -1

    tokens = tft_utils.String2Tokens(s, "$")
    if (len(tokens) not in [1, 2]):
        return None

    label = tokens[0]

    if (label in op_labels):
        if (len(tokens) == 2):
            gid = int(tokens[1])

        return op_class(gid, label)

    else:
        return None
Esempio n. 6
0
def SolveExprs(fname_exprs, optimizers={}):
    global EFORMS
    global E_UPPER_BOUND
    global M2

    global GID_EPSS
    global GID_COUNTS
    global GID_WEIGHT
    global CASTING_MAP
    global EQ_GIDS
    global CONSTRAINT_EXPRS
    global OPT_ERROR_FORM
    global TARGET_EXPRS

    EFORMS = None
    E_UPPER_BOUND = None
    M2 = None

    time_parsing = time.time()

    tft_utils.VerboseMessage("parsing input expression...")
    tft_utils.DebugMessage("reading .exprs file...")

    assert (os.path.isfile(fname_exprs))
    assert (ERROR_TYPE in ["abs", "rel"])

    # variables
    input_vars = []
    E_UPPER_BOUND = None

    ilines = []

    efile = open(fname_exprs, "r")
    for aline in efile:
        aline = aline.strip()
        if (aline == ""):
            continue
        if (aline.startswith("#")):
            continue
        ilines.append(aline)
    efile.close()

    # options
    assert (ilines[0] == "options:")
    ilines = ilines[1:]
    while True:
        if (ilines[0] == "upper-bound:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], ":")
        assert (len(tokens) == 2)

        if (tokens[0] == "opt-error-form"):
            OPT_ERROR_FORM = tft_utils.String2Bool(tokens[1])

        else:
            sys.exit("ERROR: unknown option setting: " + ilines[0])

        ilines = ilines[1:]

    # read error bound
    assert (ilines[0] == "upper-bound:")
    ilines = ilines[1:]
    E_UPPER_BOUND = tft_expr.ConstantExpr(float(ilines[0]))
    ilines = ilines[1:]

    # M2
    M2 = tft_expr.ConstantExpr(0.0)

    # read variable ranges
    assert (ilines[0] == "var-ranges:")
    ilines = ilines[1:]
    while True:
        if (ilines[0] == "group-epsilons:"):
            break

        var = tft_parser.String2BoundedVariableExpr(ilines[0])

        assert (var not in input_vars)
        input_vars.append(var)

        ilines = ilines[1:]

    # get groups' epsilons
    assert (ilines[0] == "group-epsilons:")
    ilines = ilines[1:]
    while True:
        if (ilines[0] == "eq-gids:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], ":")
        assert (len(tokens) == 2)

        gid = int(tokens[0])
        str_epss = tokens[1]

        assert (str_epss.startswith("[") and str_epss.endswith("]"))
        str_epss = str_epss[1:len(str_epss) - 1]
        str_eps_list = tft_utils.String2Tokens(str_epss, ",")

        eps_list = []

        for i in range(0, len(str_eps_list)):
            try:
                i = tft_alloc.EpsLabels_String().index(str_eps_list[i])
                eps_list.append(
                    tft_expr.ConstantExpr(tft_alloc.EPSILONS[i].value))
            except ValueError:
                expr_eps = tft_parser.String2Expr(str_eps_list[i], True)
                assert (isinstance(expr_eps, tft_expr.ConstantExpr))
                eps_list.append(expr_eps)

        assert (gid not in GID_EPSS.keys())
        GID_EPSS[gid] = eps_list

        ilines = ilines[1:]

    # get equal bit-width groups
    assert (len(ilines) > 0)
    assert (ilines[0] == "eq-gids:")
    ilines = ilines[1:]
    while True:
        assert (len(ilines) > 0)
        if (ilines[0] == "gid-counts:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], "=")
        assert (len(tokens) == 2)

        gid_1 = int(tokens[0])
        gid_2 = int(tokens[1])

        if (gid_1 == gid_2):
            ilines = ilines[1:]
            continue

        assert (gid_1 in GID_EPSS.keys())
        assert (gid_2 in GID_EPSS.keys())
        assert (GID_EPSS[gid_1] == GID_EPSS[gid_2])

        gp12 = (gid_1, gid_2)
        gp21 = (gid_2, gid_1)

        if ((gp12 not in EQ_GIDS) and (gp21 not in EQ_GIDS)):
            EQ_GIDS.append(gp12)

        ilines = ilines[1:]

    # get GID_COUNTS
    assert (len(ilines) > 0)
    assert (ilines[0] == "gid-counts:")
    ilines = ilines[1:]
    while True:
        assert (len(ilines) > 0)
        if (ilines[0] == "casting-counts:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], ":")
        assert (len(tokens) == 2)

        gid = int(tokens[0])
        c = int(tokens[1])

        assert (gid >= 0)
        assert (c >= 0)
        assert (gid not in GID_COUNTS.keys())

        if (c > 0):
            GID_COUNTS[gid] = c

        ilines = ilines[1:]

    # get CASTING_MAP
    assert (len(ilines) > 0)
    assert (ilines[0] == "casting-counts:")
    ilines = ilines[1:]
    while True:
        assert (len(ilines) > 0)
        if (ilines[0] == "gid-weight:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], ":")
        assert (len(tokens) == 2)

        p = tokens[0]
        c = int(tokens[1])

        assert (c > 0)
        assert (p.startswith("(") and p.endswith(")"))
        p = p[1:len(p) - 1]

        tokens = tft_utils.String2Tokens(p, ",")
        assert (len(tokens) == 2)

        gid_from = int(tokens[0])
        gid_to = int(tokens[1])

        p = (gid_from, gid_to)

        assert (p not in CASTING_MAP.keys())
        CASTING_MAP[p] = c

        ilines = ilines[1:]

    # get gid-weight mapping
    assert (len(ilines) > 0)
    assert (ilines[0] == "gid-weight:")
    ilines = ilines[1:]
    while True:
        assert (len(ilines) > 0)
        if (ilines[0] == "exprs:"):
            break

        tokens = tft_utils.String2Tokens(ilines[0], ":")
        assert (len(tokens) == 2)

        gid = int(tokens[0])
        weight = float(tokens[1])
        assert (0 <= gid)
        assert (0 <= weight)

        GID_WEIGHT[gid] = weight

        ilines = ilines[1:]

    # get expressions
    assert (len(ilines) > 0)
    assert (ilines[0] == "exprs:")
    ilines = ilines[1:]
    while True:
        assert (len(ilines) > 0)
        if (ilines[0] == "constraints:"):
            break

        target_expr = tft_parser.String2Expr(ilines[0], False)
        assert (isinstance(target_expr, tft_expr.ArithmeticExpr))

        TARGET_EXPRS.append(target_expr)

        ilines = ilines[1:]

    assert (len(TARGET_EXPRS) > 0)
    assert (all([isinstance(te, tft_expr.Expr) for te in TARGET_EXPRS]))

    # get constraints
    assert (len(ilines) > 0)
    assert (ilines[0] == "constraints:")
    ilines = ilines[1:]
    while True:
        if (len(ilines) == 0):
            break

        pred_expr = tft_parser.String2Expr(ilines[0], False)
        assert (isinstance(pred_expr, tft_expr.Predicate))

        CONSTRAINT_EXPRS.append(pred_expr)

        ilines = ilines[1:]

    # ---- generate the Error Forms ----
    tft_utils.DebugMessage(".exprs file read")
    tft_utils.DebugMessage("generating ErrorForms...")

    target_alloc = None
    irstrings = None

    EFORMS = []
    for te in TARGET_EXPRS:
        ef = GenerateErrorFormFromExpr(te, ERROR_TYPE, E_UPPER_BOUND, M2,
                                       EQ_GIDS, CONSTRAINT_EXPRS)
        EFORMS.append(ef)

    assert (len(EFORMS) == len(TARGET_EXPRS))

    # ---- solve from the ErrorForms ----
    tft_utils.DebugMessage("ErrorForms generated")

    tft_utils.TIME_PARSING = tft_utils.TIME_PARSING + (time.time() -
                                                       time_parsing)

    EFORMS, target_alloc = SolveErrorForms(EFORMS, optimizers)

    if (VERBOSE):
        print("---- Error Forms after solving ----")
        for ef in EFORMS:
            print(str(ef))
            print("------------")

    if (target_alloc is None):
        print("TFT: no available allocation for the main expr...")
        return EFORMS, None

    # ---- some finalize before return ----
    if (VERBOSE):
        stat = {}
        for te in TARGET_EXPRS:
            tft_expr.ExprStatistics(te, stat)

        assert ("# constants" in stat.keys())
        assert ("# variables" in stat.keys())
        assert ("# operations" in stat.keys())
        assert ("groups" in stat.keys())

        stat["groups"].sort()

        print("---- # constants:  " + str(stat["# constants"]) +
              "  (# of appearances)")
        print("---- # variables:  " + str(stat["# variables"]) +
              "  (# of appearances)")
        print("---- # operations: " + str(stat["# operations"]))
        print("---- groups: " + str(stat["groups"]))

#        n_opts,n_insts = tft_error_form.countOptsInsts(EFORMS)
#        print ("---- # of (static) operations: " + str(n_opts)  + " ----")
#        print ("---- # of (dynamic) instances: " + str(n_insts) + " ----")

# ---- return ----
    return EFORMS, target_alloc
Esempio n. 7
0
def Cluster():
    assert ((DEF.N_CTT_Samples is None)
            or ((type(DEF.N_CTT_Samples) is int) and (DEF.N_CTT_Samples > 0)))

    print("==== clustering sub-domains ====")

    # ---- load feature --> allocation ----
    Features = []
    Allocations = []
    FID_AID = []
    assert (os.path.isfile(DEF.FNAME_Feature_Allocation))
    file_fa = open(DEF.FNAME_Feature_Allocation, "r")

    for aline in file_fa:
        aline = aline.strip()

        if (aline == ""):
            continue

        if (DEF.N_CTT_Samples is not None):
            assert (len(Features) <= DEF.N_CTT_Samples)
            if (len(Features) == DEF.N_CTT_Samples):
                break

        feat_alloc = tft_utils.String2Tokens(aline, ":")
        assert (len(feat_alloc) == 2)

        assert (feat_alloc[0].startswith("[") and feat_alloc[0].endswith("]"))
        str_feat = feat_alloc[0][1:len(feat_alloc[0]) - 1]
        feats = tft_utils.String2Tokens(str_feat, ",")
        this_feat = [float(feats[i]) for i in range(0, len(feats))]

        this_alloc = tft_alloc.Alloc()
        this_alloc.loadFromShortString(feat_alloc[1])

        Features.append(this_feat)

        this_fid = len(Features) - 1
        this_aid = -1
        for aid in range(0, len(Allocations)):
            if (this_alloc == Allocations[aid]):
                this_aid = aid
                break
        if (this_aid == -1):
            Allocations.append(this_alloc)
            this_aid = len(Allocations) - 1

        FID_AID.append(this_aid)

    file_fa.close()

    if (DEF.N_CTT_Samples is None):
        DEF.N_CTT_Samples = len(Features)

    # check the validity of the parameters
    assert (0 < len(Features))
    assert (len(Features) == len(FID_AID))
    assert (len(Features) == DEF.N_CTT_Samples)
    assert (DEF.N_Clusters <= len(Features))

    # build FID -> HDLabel mapping for clustering
    fid_hdlabel = []
    for fid in range(0, len(Features)):
        assert (fid < len(FID_AID))
        aid = FID_AID[fid]

        assert (aid < len(Allocations))
        alloc = Allocations[aid]

        hdlabel = DEF.Alloc2HDLabel(alloc)
        assert (DEF.isTypedList(int, hdlabel))

        fid_hdlabel.append(hdlabel)

    assert (len(fid_hdlabel) == len(Features))

    # -- export HDLID to GID --
    # NOTE: this mapping is built by function DEF.Alloc2HDLabel
    assert (DEF.HDLID_GID is not None)
    file_hdlid_gid = open(DEF.FNAME_HDLID_GID, "w")

    for hdlid, gid in DEF.HDLID_GID.items():
        assert ((type(hdlid) is int) and (type(gid) is int))
        file_hdlid_gid.write(str(hdlid) + " " + str(gid) + "\n")

    file_hdlid_gid.close()

    # clustering
    rel_tree = treecluster(fid_hdlabel, None, None, 0, 'm', 'b', None)
    fid_cid = rel_tree.cut(DEF.N_Clusters)

    FID_CID = {}
    for fid in range(0, len(fid_cid)):
        cid = fid_cid[fid]
        assert (fid not in FID_CID.keys())
        FID_CID[fid] = cid

    # -- merge the allocs classified to the same class --
    print("==== merging tuning results based on clustering ====")

    assert (len(Features) == len(FID_AID))
    assert (len(Features) == len(FID_CID))

    DEF.CID_HDLabel = {}  # [None for c in range(0, DEF.N_Clusters)]
    for fid in range(0, len(FID_CID)):
        cid = FID_CID[fid]
        if (cid not in DEF.CID_HDLabel.keys()):
            DEF.CID_HDLabel[cid] = None
        DEF.CID_HDLabel[cid] = DEF.MergeHDLsTowardTop(DEF.CID_HDLabel[cid],
                                                      fid_hdlabel[fid])

    # -- fixing the repeated HDLabels (which are allocations) --
    EQ_CID = {}

    for cid in range(0, DEF.N_Clusters):
        same_cid = cid
        for later_cid in range(0, DEF.N_Clusters):
            if (DEF.CID_HDLabel[cid] == DEF.CID_HDLabel[later_cid]):
                same_cid = later_cid
        if (same_cid > cid):
            assert (cid not in EQ_CID.keys())
            EQ_CID[cid] = same_cid

    for fid, cid in FID_CID.items():
        if (cid in EQ_CID.keys()):
            FID_CID[fid] = EQ_CID[cid]
            if (cid in DEF.CID_HDLabel.keys()):
                del DEF.CID_HDLabel[cid]

    for cid1 in DEF.CID_HDLabel.keys():
        for cid2 in DEF.CID_HDLabel.keys():
            if (cid1 == cid2):
                continue
            assert (DEF.CID_HDLabel[cid1] != DEF.CID_HDLabel[cid2])

    # -- print out the allocations --
    for cid in range(0, DEF.N_Clusters):
        if (cid in DEF.CID_HDLabel.keys()):
            print("---- class alloc (" + str(cid) + ") ----")
            DEF.PrintHDLabel(DEF.CID_HDLabel[cid])

    # -- export CID_HDLabel --
    file_cid_hdlabel = open(DEF.FNAME_CID_HDLabel, "w")

    for cid in range(0, DEF.N_Clusters):
        if (cid in DEF.CID_HDLabel.keys()):
            file_cid_hdlabel.write(
                str(cid) + " : " + str(DEF.CID_HDLabel[cid]) + "\n")

    file_cid_hdlabel.close()

    # -- build DEF.CID_Training_Counts --
    n_features = len(Features)
    DEF.CID_Training_Counts = {}  # [0 for i in range(0, DEF.N_Clusters)]
    for fid in range(0, n_features):
        if (not DEF.isTrainingID(fid, n_features)):
            continue

        cid = FID_CID[fid]
        assert ((0 <= cid) and (cid < DEF.N_Clusters))

        if (cid not in DEF.CID_Training_Counts.keys()):
            DEF.CID_Training_Counts[cid] = 0

        DEF.CID_Training_Counts[cid] = DEF.CID_Training_Counts[cid] + 1

    assert (sum(DEF.CID_Training_Counts.values()) == (int(
        float(n_features) * float(DEF.RATE_Trains_Samples))))

    if (VERBOSE):
        print("---- cid to # of training partitions ----")
        for cid in range(0, DEF.N_Clusters):
            if (cid in DEF.CID_Training_Counts.keys()):
                print("CID: " + str(cid) + " : " +
                      str(DEF.CID_Training_Counts[cid]))

    # -- export CID_Training_Counts --
    file_ctc = open(DEF.FNAME_CID_Training_Counts, "w")

    for cid, tcounts in DEF.CID_Training_Counts.items():
        file_ctc.write(str(cid) + " " + str(tcounts) + "\n")

    file_ctc.close()

    # -- export training and testing data --
    # ( export feature -> cluster )
    assert ((0 < DEF.RATE_Trains_Samples) and (DEF.RATE_Trains_Samples < 1))

    file_train_f2c = open(DEF.FNAME_SVM_TRAIN_FEATURE_CLUSTER, "w")
    file_test_f2c = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "w")

    file_train_f2c_csv = open(DEF.FNAME_CSV_TRAIN_FEATURE_CLUSTER, "w")
    file_test_f2c_csv = open(DEF.FNAME_CSV_TEST_FEATURE_CLUSTER, "w")

    assert (n_features > 0)
    l_feat = len(Features[0])

    file_train_f2c_csv.write(",cid")
    file_test_f2c_csv.write(",cid")

    for f in range(0, l_feat):
        file_train_f2c_csv.write(",f" + str(f))
        file_test_f2c_csv.write(",f" + str(f))

    file_train_f2c_csv.write("\n")
    file_test_f2c_csv.write("\n")

    for fid in range(0, n_features):
        assert (len(Features[fid]) == l_feat)

        cid = FID_CID[fid]

        if (DEF.isTrainingID(fid, n_features)):
            file_train_f2c.write(str(cid))

            file_train_f2c_csv.write(str(fid) + "," + str(cid))

            for findex in range(0, l_feat):
                file_train_f2c.write(" " + str((findex + 1)) + ":" +
                                     str(Features[fid][findex]))

                file_train_f2c_csv.write("," + str(Features[fid][findex]))

            file_train_f2c.write("\n")

            file_train_f2c_csv.write("\n")

        else:
            file_test_f2c.write(str(cid))

            file_test_f2c_csv.write(str(fid) + "," + str(cid))

            for findex in range(0, l_feat):
                file_test_f2c.write(" " + str((findex + 1)) + ":" +
                                    str(Features[fid][findex]))

                file_test_f2c_csv.write("," + str(Features[fid][findex]))

            file_test_f2c.write("\n")

            file_test_f2c_csv.write("\n")

    file_train_f2c.close()
    file_test_f2c.close()
Esempio n. 8
0
def main():
    # ==== get parameters ====
    parser = argparse.ArgumentParser()
    parser.add_argument("expr_spec", help="Expression Specification")

    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        default=False,
                        help="Verbose mode")

    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        default=False,
                        help="Debug mode")

    parser.add_argument("-n",
                        "--no-m2-check",
                        action="store_true",
                        default=False,
                        help="Skip m2 check")

    parser.add_argument("-m",
                        "--maxc",
                        type=int,
                        help="Maximum number of type casts")

    parser.add_argument("--linear-tc",
                        action="store_true",
                        default=False,
                        help="Use linear type casting constraints")

    parser.add_argument(
        "--aopt",
        type=str,
        default="gurobi",
        help=
        "Allocation optimization solver: \"gurobi\" for Gurobi and \"glpk\" for GLPK (must work with --linear-tc)"
    )

    parser.add_argument("--gopt-timeout",
                        type=int,
                        default=120,
                        help="Timeout of the global optimization")

    parser.add_argument("--gopt-tolerance",
                        type=float,
                        default=5e-02,
                        help="Tolerance of the global optimization")

    parser.add_argument("--optm",
                        type=str,
                        default="max-benefit",
                        choices=tft_utils.OPT_METHODS,
                        help="Optimization method")

    parser.add_argument("-e",
                        "--error-bounds",
                        type=str,
                        required=True,
                        help="Error bounds")

    parser.add_argument("-b",
                        "--bitwidths",
                        type=str,
                        default="32 64",
                        help="Bit-width candidates")

    parser.add_argument("--fix-const-type",
                        action="store_true",
                        default=False,
                        help="Fix the constant type to the highest bit-width")

    args = parser.parse_args()

    INPUT_FILE = args.expr_spec
    if not os.path.isfile(INPUT_FILE):
        error("Input expression file doesn't exist: {}".format(INPUT_FILE))

    tft_utils.FPTUNER_VERBOSE = args.verbose or args.debug

    tft_utils.FPTUNER_DEBUG = args.debug

    tft_utils.NO_M2_CHECK = args.no_m2_check

    if args.maxc != None:
        if args.maxc < 0:
            error("maxc must be >= 0")
        tft_utils.N_MAX_CASTINGS = args.maxc

    if args.gopt_timeout <= 0:
        error("gopt-timeout must be > 0")
    tft_utils.GOPT_TIMEOUT = args.gopt_timeout

    if args.gopt_tolerance < 0.0:
        error("gopt-tolerance must be >= 0.0")
    tft_utils.GOPT_TOLERANCE = args.gopt_tolerance

    tft_utils.OPT_METHOD = args.optm

    err_list = args.error_bounds.split()
    try:
        err_list = [float(e) for e in err_list]
        if len(err_list) == 0 or not all([e > 0.0 for e in err_list]):
            raise ValueError
    except ValueError:
        error("The error bounds must all be non-negative floats.")
    tft_tuning.ERROR_BOUNDS = err_list

    bit_widths = args.bitwidths.replace(',', ' ').split()
    try:
        bit_widths = [int(b) for b in bit_widths]
        bit_widths = list(set(bit_widths))
        bit_widths.sort()
        if bit_widths not in ([32, 64, 128], [32, 64], [64, 128]):
            raise ValueError
    except ValueError:
        error(
            "Accepted bitwidth candidates are: '32 64', '64 128', '32 64 128'")
    IR.PREC_CANDIDATES = ["e{}".format(b) for b in bit_widths]

    tft_utils.FIX_CONST_TYPE = args.fix_const_type

    tft_utils.LINEAR_TYPE_CASTING_CONSTRAINTS = args.linear_tc

    tft_tuning.OPTIMIZERS["alloc"] = args.aopt
    if (tft_tuning.OPTIMIZERS['alloc'] == 'glpk'):
        assert (tft_utils.LINEAR_TYPE_CASTING_CONSTRAINTS
                ), "Solver GLPK must work with --linear-tc"

    # ==== load the input file as a module ====
    if INPUT_FILE.endswith(".py"):
        tokens = tft_utils.String2Tokens(INPUT_FILE, "/")
        assert (len(tokens) >= 1)

        module_name = tokens[-1]
        assert (module_name.endswith(".py"))
        module_name = module_name[0:len(module_name) - 3]

        IR.LOAD_CPP_INSTS = True
        module_in = imp.load_source(module_name, INPUT_FILE)
        if (IR.TARGET_EXPR is None):
            error("no tuning target expression was specified.")
        IR.LOAD_CPP_INSTS = False

    else:  # New frontend
        with open(INPUT_FILE, 'r') as f:
            data = f.read()
        processed_data = get_runmain_input(data)
        py_source = translate(processed_data)

        code_obj = compile(py_source, '<string>', 'exec')

        IR.LOAD_CPP_INSTS = True
        exec(code_obj)
        if (IR.TARGET_EXPR is None):
            error("No tuning target expression was specified.")
        IR.LOAD_CPP_INSTS = False

    # ==== tune the targeted expression ====
    # reset the timers
    tft_utils.TIME_PARSING = 0
    tft_utils.TIME_FIRST_DERIVATIVES = 0
    tft_utils.TIME_GLOBAL_OPT = 0
    tft_utils.TIME_ALLOCATION = 0
    tft_utils.TIME_CHECK_M2 = 0

    # possibly remove the .exprs file
    EXPRS_NAME = INPUT_FILE + ".exprs"
    if (os.path.isfile(EXPRS_NAME)):
        tft_utils.VerboseMessage("Warning: overwriting existing file: " +
                                 EXPRS_NAME)
        os.system("rm " + EXPRS_NAME)

    # go tuning
    for i in range(0, len(tft_tuning.ERROR_BOUNDS)):
        eforms = None
        alloc = None

        # Tune for the first error bound.
        # Need to generate the .exprs file first.
        if (i == 0):
            tft_ir_backend.ExportExpr2ExprsFile(IR.TARGET_EXPR,
                                                tft_tuning.ERROR_BOUNDS[0],
                                                EXPRS_NAME)

            # tune!
            eforms, alloc = tft_tuning.TFTRun(EXPRS_NAME)

        # otherwise, do some reset tasks
        else:
            tft_sol_exprs.ReadyToTune()

            new_eup = tft_expr.ConstantExpr(tft_tuning.ERROR_BOUNDS[i])

            for ef in tft_sol_exprs.EFORMS:
                ef.upper_bound = new_eup

            # solve the error form
            eforms, alloc = tft_sol_exprs.SolveErrorForms(
                tft_sol_exprs.EFORMS, tft_tuning.OPTIMIZERS)

        # show the allocation
        print("==== error bound : " + str(tft_tuning.ERROR_BOUNDS[i]) +
              " ====")
        tft_tuning.PrintAlloc(alloc, eforms)
        print("")
        tft_ir_backend.ExportColorInsts(alloc)
        print("")

        # -- synthesize the mixed precision cpp file --
        if (alloc is None):
            print(
                "Warning: no allocation was generated... Thus no .cpp file will be generated..."
            )
        else:
            assert (isinstance(alloc, tft_alloc.Alloc))
            assert (eforms is not None)

            str_error_bound = str(float(tft_tuning.ERROR_BOUNDS[i]))
            base = os.path.basename(INPUT_FILE)
            base = os.path.splitext(base)[0]
            fname_cpp = base + "." + str_error_bound + ".cpp"

            if (os.path.isfile(fname_cpp)):
                tft_utils.VerboseMessage(
                    "Warning: overwrite the existed .cpp file: " + fname_cpp)

            tft_ir_backend.ExportCppInsts(alloc, fname_cpp)

    # show the timers
    timer_fname = base + ".timers.csv"
    write_header = (not os.path.isfile(timer_fname))

    timer_file = None

    if (write_header):
        timer_file = open(timer_fname, "w")
        timer_file.write(
            "Total Parsing Time,First Derivatives,Global Optimization,QCQP,Check Higher-order Errors\n"
        )
    else:
        timer_file = open(timer_fname, "a")

    timer_file.write(
        str(float(tft_utils.TIME_PARSING)) + "," +
        str(float(tft_utils.TIME_FIRST_DERIVATIVES)) + "," +
        str(float(tft_utils.TIME_GLOBAL_OPT)) + "," +
        str(float(tft_utils.TIME_ALLOCATION)) + "," +
        str(float(tft_utils.TIME_CHECK_M2)) + "\n")

    tft_utils.VerboseMessage("Total Parsing time          : " +
                             str(float(tft_utils.TIME_PARSING)))
    tft_utils.VerboseMessage("    First Dev.              : " +
                             str(float(tft_utils.TIME_FIRST_DERIVATIVES)))
    tft_utils.VerboseMessage("Time for global optimization: " +
                             str(float(tft_utils.TIME_GLOBAL_OPT)))
    tft_utils.VerboseMessage("Time for solving QCQP       : " +
                             str(float(tft_utils.TIME_ALLOCATION)))
    tft_utils.VerboseMessage("Time for checking M2        : " +
                             str(float(tft_utils.TIME_CHECK_M2)))

    timer_file.close()
Esempio n. 9
0
def Testing (): 
    print ("==== testing in small-scale ====") 

    # turn off tft_solver.LIMIT_N_CASTINGS
    tft_solver.LIMIT_N_CASTINGS = False 

    # load CID_Training_Counts 
    if (DEF.CID_Training_Counts is None): 
        DEF.CID_Training_Counts = {} 

        assert(os.path.isfile(DEF.FNAME_CID_Training_Counts)) 
        file_ctc = open(DEF.FNAME_CID_Training_Counts, "r")

        for aline in file_ctc: 
            aline = aline.strip() 
            if (aline == ""): 
                continue 
            tokens = tft_utils.String2Tokens(aline, " ") 
            assert(len(tokens) == 2) 
            cid = int(tokens[0]) 
            tcounts = int(tokens[1]) 

            print ("CID Counts : " + str(cid) + " : " + str(tcounts)) 

            assert(cid not in DEF.CID_Training_Counts.keys()) 
            DEF.CID_Training_Counts[cid] = tcounts 
            
#        assert(sum(DEF.CID_Training_Counts.values()) == DEF.N_Samples) 
        
        file_ctc.close() 

    # load HDLID_GID 
    if (DEF.HDLID_GID is None): 
        DEF.HDLID_GID = {} 

        assert(os.path.isfile(DEF.FNAME_HDLID_GID)) 
        fild_hdlid_gid = open(DEF.FNAME_HDLID_GID, "r") 
        
        for aline in fild_hdlid_gid: 
            aline = aline.strip() 
            if (aline == ""): 
                continue 
            tokens = tft_utils.String2Tokens(aline, " ") 
            assert(len(tokens) == 2) 
            hdlid = int(tokens[0]) 
            gid = int(tokens[1]) 

            print ("HDLID: " + str(hdlid) + " : GID: " + str(gid)) 
            
            assert(hdlid not in DEF.HDLID_GID.keys()) 
            DEF.HDLID_GID[hdlid] = gid

        fild_hdlid_gid.close() 

    # load CID_HDLabel 
    if (DEF.CID_HDLabel is None): 
        DEF.CID_HDLabel = {} 
        assert(DEF.DIM_HDL == 0) 
        DEF.DIM_HDL = None 

        assert(os.path.isfile(DEF.FNAME_CID_HDLabel)) 
        file_cid_hdlabel = open(DEF.FNAME_CID_HDLabel, "r") 

        for aline in file_cid_hdlabel: 
            aline = aline.strip() 
            if (aline == ""): 
                continue 
            tokens = tft_utils.String2Tokens(aline, ":") 
            assert(len(tokens) == 2) 
            cid = int(tokens[0]) 
            str_hdlabel = tokens[1] 

            assert((0 <= cid) and (cid < DEF.N_Clusters)) 
            assert(cid not in DEF.CID_HDLabel.keys()) 
            assert(str_hdlabel.startswith("[") and str_hdlabel.endswith("]")) 
            str_hdlabel = str_hdlabel[1:len(str_hdlabel)-1] 
            
            tokens = tft_utils.String2Tokens(str_hdlabel, ",")
            hdlabel = [int(tokens[i]) for i in range(0, len(tokens))] 

            print ("CID: " + str(cid) + " : " + str(hdlabel))

            if (DEF.DIM_HDL is None): 
                DEF.DIM_HDL = len(hdlabel) 
            else: 
                assert(DEF.DIM_HDL == len(hdlabel)) 

            DEF.CID_HDLabel[cid] = hdlabel[:]

        file_cid_hdlabel.close() 

    
    # count DEF.N_CTT_Samples 
    if (DEF.N_CTT_Samples is None): 
        DEF.N_CTT_Samples = 0 

        file_fc = open(DEF.FNAME_SVM_TRAIN_FEATURE_CLUSTER, "r") 

        for aline in file_fc: 
            aline = aline.strip() 

            if (aline == ""): 
                continue 

            DEF.N_CTT_Samples = DEF.N_CTT_Samples + 1 

        file_fc.close() 

        file_fc = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "r") 
        
        for aline in file_fc: 
            aline = aline.strip() 

            if (aline == ""): 
                continue 

            DEF.N_CTT_Samples = DEF.N_CTT_Samples + 1 

        file_fc.close() 

    # load testing partitions 
    assert((type(DEF.N_CTT_Samples) is int) and (DEF.N_CTT_Samples > 0)) 
    
    file_parts = open(DEF.FNAME_Partitions, "r") 

    String_Partitions = [] 
    
    for aline in file_parts: 
        aline = aline.strip() 

        if (aline == ""): 
            continue 

        assert(len(String_Partitions) <= DEF.N_CTT_Samples) 
        if (len(String_Partitions) == DEF.N_CTT_Samples): 
            break 

        String_Partitions.append(aline)

    file_parts.close() 

    assert(len(String_Partitions) == DEF.N_CTT_Samples) 

    Testing_Partitions = [] 
    pid = -1 

    for i in range(0, DEF.N_CTT_Samples): 
        aline = String_Partitions[i] 

        pid = pid + 1 

        if (DEF.isTrainingID(pid, DEF.N_CTT_Samples)): 
            continue 

        tokens = tft_utils.String2Tokens(aline, " ") 
        this_part = [] 
        
        for i in range(0, len(tokens)): 
            bs = tft_utils.String2Tokens(tokens[i], "~") 
            assert(len(bs) == 2) 
            
            lb = float(bs[0]) 
            ub = float(bs[1]) 

            this_part.append((lb, ub)) 

        Testing_Partitions.append(this_part) 

    String_Partitions = [] # release the space ... 

    # load testing feature -> CID
    Testing_Features = []
    Testing_CIDs = [] 
    assert(os.path.isfile(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER)) 
    file_fc = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "r")

    for aline in file_fc: 
        aline = aline.strip()

        if (aline == ""): 
            continue 

        tokens = tft_utils.String2Tokens(aline, " ") 

        cid = int(tokens[0]) 
        assert(cid in DEF.CID_HDLabel.keys()) 
        
        this_feature = [] 
        for i in range(1, len(tokens)): 
            fv = tft_utils.String2Tokens(tokens[i], ":") 
            assert(len(fv) == 2) 
            assert(i == int(fv[0])) 
            
            this_feature.append(float(fv[1])) 

        Testing_CIDs.append(cid) 
        Testing_Features.append(this_feature) 
    
    file_fc.close() 

    # check the validity of the data 
    n_tests = len(Testing_Partitions) 
    assert(n_tests == len(Testing_Features)) 
    assert(n_tests == len(Testing_CIDs)) 

    # go testing 
    n_test_success = 0 
    n_exact_allocs = 0 
    CID_Testing_Counts = {} # [0 for i in range(0, DEF.N_Clusters)] 

    while (len(Testing_Partitions) > 0): 
        assert(len(Testing_Partitions) == len(Testing_Features)) 
        assert(len(Testing_Features) == len(Testing_CIDs)) 

        # print out the testing progress 
        sys.stdout.write("\rTest [" + str(n_tests - len(Testing_Partitions)) + "] : ") 

        # get this partition, feature, and the cid 
        this_part = Testing_Partitions[0] 
        this_dvec = DEF.InverseSampleInputPartitionFromVec(this_part) 

        this_feature = Testing_Features[0] 

        exact_cid = Testing_CIDs[0] 

        # sanitation check 
        this_feature_2 = DEF.InputPartition2Feature([DEF.Feature_Option, []], this_dvec)
        assert(len(this_feature) == len(this_feature_2)) 

        for i in range(0, len(this_feature)): 
            f = this_feature[i] 
            f2 = this_feature_2[i] 
            assert(abs(f - f2) < 0.00000001) 

        # predict the alloc 
        this_cid = CIDPredict(this_feature)
        assert(this_cid in DEF.CID_HDLabel.keys()) 

        if (this_cid not in CID_Testing_Counts.keys()): 
            CID_Testing_Counts[this_cid] = 0 

        sys.stdout.write("predicted/exact CID : [" + str(this_cid) + " / " + str(exact_cid) + "] ") 
        sys.stdout.flush() 

        CID_Testing_Counts[this_cid] = CID_Testing_Counts[this_cid] + 1 

        predicted_alloc = DEF.HDLabel2Alloc(DEF.CID_HDLabel[this_cid]) 
        assert(predicted_alloc is not None) 

        # count exact prediction 
        if (this_cid == exact_cid): 
            n_exact_allocs = n_exact_allocs + 1 

        # solve the alloc 
        this_eforms = None 
        this_alloc = None 

        if (DEF.REUSE_EFORMS): 
            assert(DEF.BASE_EFORMS is not None) 

            original_gid_epss = None 
            new_gid_epss      = {} 
            
            # record and overwrite epsilons 
            for ef in DEF.BASE_EFORMS: 
                if (original_gid_epss is None): 
                    original_gid_epss = ef.gid2epsilons.copy() 
                else: 
                    assert(original_gid_epss.keys() == ef.gid2epsilons.keys()) 
                    for gid,epss in original_gid_epss.items():
                        assert(ef.gid2epsilons[gid] == epss) 

                for et in ef.terms: 
                    
                    et.stored_overapprox_expr = None 

                    etgid = et.getGid() 
                    assert(etgid >= 0) 
                    assert(predicted_alloc.isAssigned(etgid)) 

                    assert(etgid in original_gid_epss.keys()) 

                    ow_epss = [tft_expr.ConstantExpr(predicted_alloc[etgid])] 

                    if (etgid in new_gid_epss.keys()): 
                        assert(new_gid_epss[etgid] == ow_epss) 
                    else:
                        new_gid_epss[etgid] = ow_epss 

            for gid in original_gid_epss.keys(): 
                assert(predicted_alloc.isAssigned(gid))
                new_gid_epss[gid] = [tft_expr.ConstantExpr(predicted_alloc[gid])] 

            assert(new_gid_epss.keys() == original_gid_epss.keys()) 

            for ef in DEF.BASE_EFORMS: 
                ef.gid2epsilons = new_gid_epss.copy() 

            # solve alloc. 
            tft_tuning.TFTSystemReset() 
            DEF.RewriteVarBounds(this_part) 
            this_eforms, this_alloc = tft_sol_exprs.SolveErrorForms(DEF.BASE_EFORMS, tft_tuning.OPTIMIZERS) 

            # restore the original epsilons 
            for ef in DEF.BASE_EFORMS: 
                ef.gid2epsilons = original_gid_epss.copy() 
            
        else: 
            # create the exprs file 
            fname_part = tft_dat_sampling.FNameExprs(tft_dat_sampling.FNAME_EXPRS, id_feat) 
            
            # solve alloc. 
            tft_dat_sampling.WriteExprsFile(fname_part, this_part, predicted_alloc)     
            tft_tuning.TFTSystemReset() 
            this_eforms, this_alloc = tft_sol_exprs.SolveExprs(fname_part, tft_tuning.OPTIMIZERS) 
            os.system("rm " + fname_part) 

        # count the correct prediction 
        if (this_alloc is not None): 
            assert(this_alloc == predicted_alloc) 

            n_test_success = n_test_success + 1 
        
            if (VERBOSE): 
                sys.stdout.write(" ---- prediction successed!!") 
                sys.stdout.flush() 
                
        else: 
            if (VERBOSE): 
                print (" ---- prediction failed...") 

        # finalizing 
        del Testing_Partitions[0] 
        del Testing_Features[0] 
        del Testing_CIDs[0]

    print ("") 
    print ("Small-scale Testing Result: " + str(n_test_success) + " / " + str(n_tests) + " (" + str(float(n_test_success)/float(n_tests)) + ")") 
    print ("    Exact Result : " + str(n_exact_allocs) + " / " + str(n_tests) + " (" + str(float(n_exact_allocs)/float(n_tests)) + ")") 

    # show CID_Testing_Counts 
    assert(sum(CID_Testing_Counts.values()) == n_tests) 
    if (VERBOSE): 
        print ("---- cid to # of training partitions ----") 
        for cid in range(0, DEF.N_Clusters): 
            if (cid in CID_Testing_Counts.keys()): 
                print ("CID: " + str(cid) + " : " + str(CID_Testing_Counts[cid])) 
Esempio n. 10
0
def LoadExprsFile(fname):
    global FNAME_EXPRS
    global FILE_PREFIX
    global FILE_GID2EPSS
    global FILE_POSTFIX

    assert (fname.endswith(".exprs"))
    FNAME_EXPRS = fname

    f_stage = "prefix"
    efile = open(fname, "r")
    for aline in efile:
        # decide loading stage
        if (aline.strip() == "var-ranges:"):
            assert (f_stage == "prefix")
            f_stage = "var-ranges"
            continue

        elif (aline.strip() == "group-epsilons:"):
            assert (f_stage == "var-ranges")
            f_stage = "group-epsilons"
            continue

        elif (aline.strip() == "eq-gids:"):
            assert (f_stage == "group-epsilons")
            f_stage = "postfix"
            FILE_POSTFIX.append("\n")

        else:
            pass

        # handle based on stage
        if (f_stage == "prefix"):
            FILE_PREFIX.append(aline)

        elif (f_stage == "var-ranges"):
            if (aline.strip() != ""):

                ve = tft_parser.String2BoundedVariableExpr(aline.strip())
                assert (ve.label() not in DEF.VNames)

                if (not tft_expr.isConstVar(ve)):
                    DEF.VarExprs.append(ve)
                    DEF.VNames.append(ve.label())
                    DEF.VRanges.append(
                        (float(ve.lb().value()), float(ve.ub().value())))

        elif (f_stage == "group-epsilons"):
            if (aline.strip() != ""):
                tokens = tft_utils.String2Tokens(aline.strip(), ":")
                assert (len(tokens) == 2)

                gid = int(tokens[0])
                epss = tokens[1]

                assert (gid not in FILE_GID2EPSS.keys())
                FILE_GID2EPSS[gid] = epss

        elif (f_stage == "postfix"):
            FILE_POSTFIX.append(aline)

        else:
            assert (False)

    efile.close()

    # generate N_Var_Intervals
    DEF.assert_VNames()
    DEF.assert_VRanges()
    DEF.N_Var_Intervals = [DEF.N_Partitions for i in range(0, len(DEF.VNames))]
Esempio n. 11
0
def LoadConfig (fname_config): 
    global OPTIMIZERS

    # default settings 
    tft_ask_gurobi.VERBOSE = False 
    tft_ask_markian.VERBOSE = False 
    tft_solver.VERBOSE = False 
    tft_parser.VERBOSE = False 
    tft_get_first_derivations.VERBOSE = False 

    # -- beginning of loading config. file -- 
    cfile = open(fname_config, "r")

    for aline in cfile: 
        aline = aline.strip() 
        if (aline == ""): 
            continue 
        if (aline.startswith("#")): 
            continue 

        tokens = tft_utils.String2Tokens(aline, "=") 
        assert(len(tokens) == 2) 

        opt = tokens[0] 
        val = tokens[1] 

        if (opt == "OPT_VRANGE"): 
            assert(val in tft_solver.ALL_OPTIMIZERS) 
            OPTIMIZERS["vrange"] = val
        elif (opt == "OPT_ALLOC"): 
            assert(val in tft_solver.ALL_OPTIMIZERS) 
            OPTIMIZERS["alloc"] = val 

        elif (opt == "VERBOSE_GUROBI"): 
            tft_ask_gurobi.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "VERBOSE_SAMPLER"):
            tft_ask_sampler.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "VERBOSE_SAMPLERS"):
            tft_ask_samplers.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "VERBOSE_SOLVER"): 
            tft_solver.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "VERBOSE_PARSER"):
            tft_parser.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "VERBOSE_GET_FIRST_DERIVATIONS"):
            tft_get_first_derivations.VERBOSE = tft_utils.String2Bool(val) 
        elif (opt == "ERROR_TYPE"): 
            tft_sol_exprs.ERROR_TYPE = val 
        elif (opt == "VERBOSE_SOL_EXPRS"):
            tft_sol_exprs.VERBOSE = tft_utils.String2Bool(val) 

        elif (opt == "GELPIA_TIMEOUT"): 
            tft_ask_gelpia.TIMEOUT = tft_utils.String2Int(val) 
            assert(tft_ask_gelpia.TIMEOUT > 0)
        elif (opt == "GELPIA_TOLERANCE"): 
            tft_ask_gelpia.DEFAULT_TOLERANCE = tft_utils.String2Float(val) 
            assert(tft_ask_gelpia.DEFAULT_TOLERANCE >= 1e-07) 

        elif (opt == "N_GELPIAS"): 
            tft_ask_gelpias.N_GELPIAS = tft_utils.String2Int(val) 
            assert(tft_ask_gelpias.N_GELPIAS > 0) 

        elif (opt == "SOLVER_N_SAMPLES"):
            tft_solver.N_SAMPLES = tft_utils.String2Int(val) 
            assert(tft_solver.N_SAMPLES > 0) 
        elif (opt == "SOLVER_ADDRESS_CASTINGS"): 
            tft_solver.ADDRESS_CASTINGS = tft_utils.String2Bool(val) 
        elif (opt == "SOLVER_LIMIT_N_CASTINGS"): 
            tft_solver.LIMIT_N_CASTINGS = tft_utils.String2Bool(val) 
        elif (opt == "SOLVER_N_MAX_CASTINGS"): 
            tft_solver.N_MAX_CASTINGS = tft_utils.String2Int(val) 
            assert(0 <= tft_solver.N_MAX_CASTINGS) 

        elif (opt == "SAMPLERS_N_SAMPLERS"): 
            tft_ask_samplers.N_SAMPLERS = tft_utils.String2Int(val) 
            assert(tft_ask_samplers.N_SAMPLERS > 0) 

        elif (opt.startswith("DAT_")): 
            continue 
            
        else: 
            sys.exit("ERROR: invalid option: " + opt) 
            
    cfile.close() 
Esempio n. 12
0
def LoadDATConfig(fname_config):
    global N_Samples
    global N_CTT_Samples
    global N_Clusters
    global N_Partitions
    global VERBOSE
    global Feature_Option
    global TRAIN_MODE
    global TEST_MODE
    global STEP_SAMPLE
    global STEP_CLUSTER
    global STEP_TRAIN
    global STEP_TEST
    global Sampling_Method

    fconf = open(fname_config, "r")

    for aline in fconf:
        aline = aline.strip()
        if (aline == ""):
            continue

        if (aline.startswith("#")):
            continue

        tokens = tft_utils.String2Tokens(aline, "=")
        assert (len(tokens) == 2)

        opt = tokens[0]
        val = tokens[1]

        if (opt.startswith("DAT_")):
            if (opt == "DAT_VERBOSE"):
                VERBOSE = tft_utils.String2Bool(val)
            elif (opt == "DAT_N_SAMPLES"):
                N_Samples = tft_utils.String2Int(val)
            elif (opt == "DAT_N_CTT_SAMPLES"):
                N_CTT_Samples = tft_utils.String2Int(val)
            elif (opt == "DAT_N_CLUSTERS"):
                N_Clusters = tft_utils.String2Int(val)
            elif (opt == "DAT_N_PARTITIONS"):
                N_Partitions = tft_utils.String2Int(val)
            elif (opt == "DAT_FEATURE_OPT"):
                Feature_Option = val

            elif (opt == "DAT_SAMPLING_METHOD"):
                Sampling_Method = val

            elif (opt == "DAT_STEP_SAMPLE"):
                STEP_SAMPLE = tft_utils.String2Bool(val)
            elif (opt == "DAT_STEP_CLUSTER"):
                STEP_CLUSTER = tft_utils.String2Bool(val)
            elif (opt == "DAT_STEP_TRAIN"):
                STEP_TRAIN = tft_utils.String2Bool(val)
            elif (opt == "DAT_STEP_TEST"):
                STEP_TEST = tft_utils.String2Bool(val)

            elif (opt == "DAT_TRAIN_MODE"):
                TRAIN_MODE = val
            elif (opt == "DAT_TEST_MODE"):
                TEST_MODE = val

            else:
                sys.exit("ERROR: invalid DAT option: " + opt)
        else:
            pass

    fconf.close()

    # checking the correct of the settings
    assert (0 < N_Samples)
    assert (0 < N_Partitions)
    assert (Feature_Option in Available_Feature_Options)
    assert (Sampling_Method in Available_Sampling_Methods)
Esempio n. 13
0
def DecisionTreeTraining(tdata):
    assert (type(tdata) is str)
    assert (os.path.isfile(tdata))
    assert (tdata.endswith(".csv"))

    # calculate the # of features
    tfile = open(tdata, "r")

    n_feats = None

    for aline in tfile:
        aline = aline.strip()

        if (aline == ""):
            continue

        tokens = tft_utils.String2Tokens(aline, ",")

        if (n_feats is None):
            assert (DEF.DT_FLABELS is None)
            assert (len(tokens) > 1)
            assert (tokens[0] == "cid")

            n_feats = len(tokens) - 1
            DEF.DT_FLABELS = tokens[1:]

        else:
            assert ((n_feats + 2) == len(tokens))

    tfile.close()

    assert ((n_feats is not None) and (n_feats > 0))

    # go training
    print("==== DecisionTree training model ====")

    DEF.DT_MODEL = DT.DecisionTree(
        training_datafile=tdata,
        csv_class_column_index=1,
        csv_columns_for_features=range(2, (2 + n_feats)),
        entropy_threshold=0.01,
        max_depth_desired=3,
        symbolic_to_numeric_cardinality_threshold=0.00001,
    )

    #    etd = DT.EvalTrainingData(training_datafile = tdata,
    #                              csv_class_column_index = 1,
    #                              csv_columns_for_features = range(2, (2+n_feats)),
    #                              entropy_threshold = 0.01,
    #                              max_depth_desired = 3,
    #                              symbolic_to_numeric_cardinality_threshold = 0.00001,)

    #    etd.get_training_data()
    #    score_train = etd.evaluate_training_data()
    #    print ("ETD training score: " + str(score_train))

    DEF.DT_MODEL.get_training_data()
    DEF.DT_MODEL.calculate_first_order_probabilities()
    DEF.DT_MODEL.calculate_class_priors()
    DEF.DT_MODEL.show_training_data()

    DEF.DT_ROOT = DEF.DT_MODEL.construct_decision_tree_classifier()

    if (VERBOSE):
        print("-- dump the trained decision tree --")
        DEF.DT_ROOT.display_decision_tree("    ")