Beispiel #1
0
def main():
    if len(sys.argv) != 4:
        print(
            "Usage: {} [Likely_Binary_Output] [Condensed_Binary_Output] [closed|maximal]"
            .format(sys.argv[0]))
        exit(1)

    lFileName = sys.argv[1]
    cFileName = sys.argv[2]
    condType = sys.argv[3]
    if condType == 'closed':
        ctype = 1
    else:
        if condType == 'maximal':
            ctype = 2
        else:
            print("third argument should be either 'closed' or 'maximal'")
            exit(1)

    (l_lipmap, l_threshold, l_numVars,
     l_numValsList) = readLikelyBin.likelyProbs(lFileName)
    (c_lipmap, c_threshold, c_numVars,
     c_numValsList) = readLikelyBin.likelyProbs(cFileName)

    if l_threshold != c_threshold or l_numVars != c_numVars or l_numValsList != c_numValsList:
        print("Two itemsets do not match")
        exit(1)

    cSet = set(c_lipmap.keys())
    if ctype == 1:
        c2Set = extractClosed(l_lipmap)
    else:
        c2Set = extractMaximal(set(l_lipmap.keys()))

    diffSet = cSet.symmetric_difference(c2Set)
    if (len(diffSet)):

        def print_items(itemset):
            print("{", end='')
            for i in range(len(itemset)):
                if itemset[i] is 2:
                    print("0", end='')
                else:
                    print("1", end='')
                if i < len(itemset) - 1:
                    print(", ", end='')
            print("}")

        print("CONDENSED SET:")

        for itemset in cSet.intersection(c2Set):
            print_items(itemset)

        print("REDUNDANT ITEMSETS:")
        for itemset in diffSet:
            print_items(itemset)

    else:
        print("SUCCESS!")
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument("-b",
                        "--binary-file",
                        help="the binary output file of cp itemset miner",
                        required=True)
    parser.add_argument("-n",
                        "--names-file",
                        help="the 'names' file produced by cp itemset miner",
                        required=True)
    parser.add_argument("-d",
                        "--binary-file2",
                        help="the binary output file of cp itemset miner",
                        required=True)
    parser.add_argument("-m",
                        "--names-file2",
                        help="the 'names' file produced by cp itemset miner",
                        required=True)
    args = parser.parse_args()

    (lipmap, threshold, numVars,
     numValsList) = readLikelyBin.likelyProbs(args.binary_file)
    namesFileName = args.names_file
    (lVarNames, lValNames) = readVarVals(namesFileName)
    domain_sizes = [len(val_names) for val_names in lValNames]

    (lipmap2, threshold2, numVars2,
     numValsList2) = readLikelyBin.likelyProbs(args.binary_file2)
    namesFileName2 = args.names_file2
    (lVarNames2, lValNames2) = readVarVals(namesFileName2)
    domain_sizes2 = [len(val_names) for val_names in lValNames2]
    #TODO use 'pairs' file to generate output accordingly

    for key in lipmap.keys():
        (min_prob, max_prob) = lipmap[key]
        (min_prob2, max_prob2) = lipmap2[key]
        print('{', end='')
        first = True
        for i in range(len(key)):
            if key[i] != domain_sizes[i]:
                if first:
                    first = False
                else:
                    print(',', end='')
                print('({}={})'.format(lVarNames[i], lValNames[i][key[i]]),
                      end='')
        print('}', end='')
        print(' -- {} -- {}'.format(min_prob, min_prob2))
Beispiel #3
0
def main():
    if len(sys.argv) != 3:
        print("Usage: {} [Likely_Sets_Binary_Output] [name_file] ".format(
            sys.argv[0]))
        exit(1)
    (lipmap, threshold, numVars,
     numValsList) = readLikelyBin.likelyProbs(sys.argv[1])
    liplist = sorted(lipmap.iteritems(),
                     key=operator.itemgetter(1),
                     reverse=True)
    (lVarNames, lValNames) = readVarVals(sys.argv[2])

    for (key, prob) in liplist:
        print "{",
        firstvar = True
        for varId in range(len(key)):
            valId = key[varId]
            if valId < len(lValNames[varId]):
                varName = lVarNames[varId]
                valName = lValNames[varId][valId]
                if firstvar:
                    print "{}={}".format(varName, valName),
                    firstvar = False
                else:
                    print ", {}={}".format(varName, valName),
        print "}} {}".format(prob)
def main():
    if len(sys.argv) != 4:
        print(
            "Usage: {} [Likely_Binary_Output] [Condensed_Sparse_Output] [closed|maximal]"
            .format(sys.argv[0]))
        exit(1)

    lFileName = sys.argv[1]
    cFileName = sys.argv[2]
    condType = sys.argv[3]
    if condType == 'closed':
        ctype = 1
    else:
        if condType == 'maximal':
            ctype = 2
        else:
            print("third argument should be either 'closed' or 'maximal'")
            exit(1)

    (l_lipmap, l_threshold, l_numVars,
     l_numValsList) = readLikelyBin.likelyProbs(lFileName)

    if ctype == 1:
        c2Set = extractClosed(l_lipmap)
    else:
        c2Set = extractMaximal(set(l_lipmap.keys()))

    def write_itemset(out_file, itemset):
        size = len(itemset)
        for i in range(size):
            if itemset[i] is 1:
                c_file.write(str(i))
                if i < size - 1:
                    out_file.write(' ')
        out_file.write('\n')
        (min_prob, max_prob) = l_lipmap[itemset]
        out_file.write(str(min_prob) + '\n')
        out_file.write(str(max_prob) + '\n')

    with open(cFileName, 'w') as c_file:
        c_file.write(str(l_threshold) + '\n')
        c_file.write(str(l_numVars) + '\n')
        for num_vals in l_numValsList:
            c_file.write(str(num_vals) + '\n')
        for itemset in c2Set:
            write_itemset(c_file, itemset)
def main():
    if len(sys.argv) != 3:
        print("Usage: {} [Likely_Sets_Binary_Output] [name_file] ".format(
            sys.argv[0]))
        exit(1)
    (lipmap, threshold, numVars,
     numValsList) = readLikelyBin.likelyProbs(sys.argv[1])
    (lVarNames, lValNames) = readVarVals(sys.argv[2])

    for key in lipmap.keys():
        prob = lipmap[key]
        print("{", end='')
        firstvar = True
        for varId in range(len(key)):
            valId = key[varId]
            if valId < len(lValNames[varId]):
                varName = lVarNames[varId]
                valName = lValNames[varId][valId]
                if firstvar:
                    print("({}={})".format(varName, valName), end='')
                    firstvar = False
                else:
                    print(", ({}={})".format(varName, valName), end='')
        print("}} {}".format(prob))
Beispiel #6
0
def main():
    if len(sys.argv) != 5:
        print ("Usage: {} [Likely_Sets_Binary_Output] [name_file] [attribute_file] [variable_value_file]".format(sys.argv[0]))
        exit (1)
    (lipmap, threshold, numVars, numValsList)  = readLikelyBin.likelyProbs(sys.argv[1])
    (aipmap, numVars2, numValsList2) = readAllBin.allProbs(sys.argv[3], sys.argv[4])

    namesFileName = sys.argv[2]
    varvalFileName = sys.argv[4]

    (lVarNames, lValNames) = readVarVals (namesFileName)
    (aVarNames, aValNames) = readVarVals (varvalFileName)
    lVarNameToId = dict()
    lValNameToId = dict()
    for lVarNameCounter in range(len(lVarNames)):
        lVarNameToId [lVarNames[lVarNameCounter]] = lVarNameCounter
        for lValNameCounter in range(len(lValNames[lVarNameCounter])):
            lValNameToId [(lVarNames[lVarNameCounter], lValNames[lVarNameCounter][lValNameCounter])] = lValNameCounter
    
    def convertKey (key):
        key2 = [0] * len(aVarNames)
        for varId in range(len(aVarNames)):
            valId = key[varId] 
            varId2 = lVarNameToId [aVarNames[varId]]
            if valId < len(aValNames[varId]):
                valId2 = lValNameToId [(aVarNames[varId], aValNames[varId][valId])]
            else:
                valId2 = valId
            if valId == 0: hasZero = True
            key2 [varId2] = valId2
        return tuple(key2)

    # create another dictionary for likely itemsets from `all'
    lipmap2 = dict()
    for key in aipmap.keys():
        t = aipmap[key]
        hasZero = False
        if t >= threshold:
            key2 = convertKey(key)
            for attrCounter in range(len(key2)):
                if key2[attrCounter] == 0:
                    hasZero = True
            if not hasZero:
                lipmap2[key2] = t


    
    # Q1. Are all likely itemsets enumerated?
    diff1 = set(lipmap2.keys()).difference(set(lipmap.keys()))
    if len(diff1):
        print ("Itemsets found by ALL and missed by CP:")
        for key in diff1:
            print ("{} --> {}".format(key, lipmap2[key]))

    diff2 = set(lipmap.keys()).difference(set(lipmap2.keys()))
    if len(diff2):
        print ("Itemsets found by CP and missed by ALL:")
        for key in diff2:
            (minprob,maxprob) = lipmap[key]
            print ("{} --> [{} .. {}]".format(key, minprob, maxprob))

    
    # Q2. How close probabilities are?
    for key in set(lipmap.keys()).intersection(set(lipmap2.keys())):
        minprob = lipmap[key][0]
        maxprob = lipmap[key][1]
        trueprob = lipmap2[key]
        if (trueprob < minprob or trueprob > maxprob):
            print ("{} min:{} max:{} true:{}".format(key, minprob, maxprob, trueprob))
def main():
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument("-b",
                        "--cp-binary",
                        help="the binary output file of cp itemset miner",
                        required=True)
    parser.add_argument(
        "-a",
        "--enum-binary",
        help="the binary output of brute-force itemset enumerator",
        required=True)
    parser.add_argument("-n",
                        "--names-file",
                        help="the 'names' file produced by cp itemset miner",
                        required=True)
    parser.add_argument(
        "-v",
        "--varval-file",
        help="the 'varval' file produced by brute-force itemset enumerator",
        required=True)
    parser.add_argument("-t",
                        "--itemset-type",
                        help="itemset types (probable, maximal, or closed)",
                        type=str,
                        default='probable')
    parser.add_argument(
        "-f",
        "--report-file",
        help="a file to which a detailed report will be written")
    args = parser.parse_args()

    if args.itemset_type not in {'probable', 'closed', 'maximal'}:
        print("Itemset type should be 'probable', 'maximal', or 'closed'")
        exit(1)

    rpf = None
    if (args.report_file != None):
        rpf = open(args.report_file, 'w')

    (lipmap, threshold, numVars,
     numValsList) = readLikelyBin.likelyProbs(args.cp_binary)
    (aipmap, numVars2,
     numValsList2) = readAllBin.allProbs(args.enum_binary, args.varval_file)

    namesFileName = args.names_file
    varvalFileName = args.varval_file

    (lVarNames, lValNames) = readVarVals(namesFileName)
    (aVarNames, aValNames) = readVarVals(varvalFileName)
    lVarNameToId = dict()
    lValNameToId = dict()
    for lVarNameCounter in range(len(lVarNames)):
        lVarNameToId[lVarNames[lVarNameCounter]] = lVarNameCounter
        for lValNameCounter in range(len(lValNames[lVarNameCounter])):
            lValNameToId[(
                lVarNames[lVarNameCounter],
                lValNames[lVarNameCounter][lValNameCounter])] = lValNameCounter

    def convertKey(key):
        key2 = [0] * len(aVarNames)
        for varId in range(len(aVarNames)):
            valId = key[varId]
            varId2 = lVarNameToId[aVarNames[varId]]
            if valId < len(aValNames[varId]):
                valId2 = lValNameToId[(aVarNames[varId],
                                       aValNames[varId][valId])]
            else:
                valId2 = valId
            key2[varId2] = valId2
        return tuple(key2)

    # create another dictionary for likely itemsets from `all'
    lipmap2 = dict()
    for key in aipmap.keys():
        t = aipmap[key]
        if t >= threshold:
            key2 = convertKey(key)
            lipmap2[key2] = t

    lipkeys = lipmap.keys()
    lip2keys = lipmap2.keys()

    domain_sizes = [len(val_names) for val_names in lValNames]
    if args.itemset_type == 'closed':
        lip2keys = extractClosed(lipmap2, domain_sizes)
    elif args.itemset_type == 'maximal':
        lip2keys = extractMaximal(set(lip2keys), domain_sizes)

    # Q1. Are all likely itemsets enumerated?
    diff1 = set(lip2keys).difference(set(lipkeys))
    if len(diff1) and args.report_file != None:
        print("Itemsets found by ALL and missed by CP:", file=rpf)
        for key in diff1:
            print("{} --> {}".format(key, lipmap2[key]), file=rpf)

    diff2 = set(lipkeys).difference(set(lip2keys))
    if len(diff2) and args.report_file != None:
        print("Itemsets found by CP and missed by ALL:", file=rpf)
        for key in diff2:
            (minprob, maxprob) = lipmap[key]
            print("{} --> [{} .. {}]".format(key, minprob, maxprob), file=rpf)

    intersect = set(lipkeys).intersection(set(lip2keys))

    if (len(diff1) == 0 and len(diff2) == 0):
        print("[OK]\tEquivalent sets of itemsets")
    else:
        print("[FAIL]\tCP:{} Enum:{} intersection:{}".format(
            len(lipkeys), len(lip2keys), len(intersect)))

    # Q2. How close probabilities are?
    out_count = 0
    first = True
    for key in intersect:
        minprob = lipmap[key][0]
        maxprob = lipmap[key][1]
        trueprob = lipmap2[key]
        if (trueprob < minprob or trueprob > maxprob):
            if (first and args.report_file != None):
                print(
                    "Itemsets with different probabilities frome CP than brute-force enumeration",
                    file=rpf)
                first = False
            out_count += 1
            if (args.report_file != None):
                print("{} min:{} max:{} true:{}".format(
                    key, minprob, maxprob, trueprob),
                      file=rpf)
    if out_count == 0:
        print(
            "[OK]\tAll probabilities in intersection ({}) within range".format(
                len(intersect)))
    else:
        print("[FAIL]\tintersection:{} in_range:{}".format(
            len(intersect),
            len(intersect) - out_count))

    if (args.report_file != None):
        rpf.close()