def main(): if len(sys.argv) != 4: print( "Usage: {} [Likely_Binary_Output] [Condensed_Binary_Output] [closed|maximal]" .format(sys.argv[0])) exit(1) lFileName = sys.argv[1] cFileName = sys.argv[2] condType = sys.argv[3] if condType == 'closed': ctype = 1 else: if condType == 'maximal': ctype = 2 else: print("third argument should be either 'closed' or 'maximal'") exit(1) (l_lipmap, l_threshold, l_numVars, l_numValsList) = readLikelyBin.likelyProbs(lFileName) (c_lipmap, c_threshold, c_numVars, c_numValsList) = readLikelyBin.likelyProbs(cFileName) if l_threshold != c_threshold or l_numVars != c_numVars or l_numValsList != c_numValsList: print("Two itemsets do not match") exit(1) cSet = set(c_lipmap.keys()) if ctype == 1: c2Set = extractClosed(l_lipmap) else: c2Set = extractMaximal(set(l_lipmap.keys())) diffSet = cSet.symmetric_difference(c2Set) if (len(diffSet)): def print_items(itemset): print("{", end='') for i in range(len(itemset)): if itemset[i] is 2: print("0", end='') else: print("1", end='') if i < len(itemset) - 1: print(", ", end='') print("}") print("CONDENSED SET:") for itemset in cSet.intersection(c2Set): print_items(itemset) print("REDUNDANT ITEMSETS:") for itemset in diffSet: print_items(itemset) else: print("SUCCESS!")
def main(): parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument("-b", "--binary-file", help="the binary output file of cp itemset miner", required=True) parser.add_argument("-n", "--names-file", help="the 'names' file produced by cp itemset miner", required=True) parser.add_argument("-d", "--binary-file2", help="the binary output file of cp itemset miner", required=True) parser.add_argument("-m", "--names-file2", help="the 'names' file produced by cp itemset miner", required=True) args = parser.parse_args() (lipmap, threshold, numVars, numValsList) = readLikelyBin.likelyProbs(args.binary_file) namesFileName = args.names_file (lVarNames, lValNames) = readVarVals(namesFileName) domain_sizes = [len(val_names) for val_names in lValNames] (lipmap2, threshold2, numVars2, numValsList2) = readLikelyBin.likelyProbs(args.binary_file2) namesFileName2 = args.names_file2 (lVarNames2, lValNames2) = readVarVals(namesFileName2) domain_sizes2 = [len(val_names) for val_names in lValNames2] #TODO use 'pairs' file to generate output accordingly for key in lipmap.keys(): (min_prob, max_prob) = lipmap[key] (min_prob2, max_prob2) = lipmap2[key] print('{', end='') first = True for i in range(len(key)): if key[i] != domain_sizes[i]: if first: first = False else: print(',', end='') print('({}={})'.format(lVarNames[i], lValNames[i][key[i]]), end='') print('}', end='') print(' -- {} -- {}'.format(min_prob, min_prob2))
def main(): if len(sys.argv) != 3: print("Usage: {} [Likely_Sets_Binary_Output] [name_file] ".format( sys.argv[0])) exit(1) (lipmap, threshold, numVars, numValsList) = readLikelyBin.likelyProbs(sys.argv[1]) liplist = sorted(lipmap.iteritems(), key=operator.itemgetter(1), reverse=True) (lVarNames, lValNames) = readVarVals(sys.argv[2]) for (key, prob) in liplist: print "{", firstvar = True for varId in range(len(key)): valId = key[varId] if valId < len(lValNames[varId]): varName = lVarNames[varId] valName = lValNames[varId][valId] if firstvar: print "{}={}".format(varName, valName), firstvar = False else: print ", {}={}".format(varName, valName), print "}} {}".format(prob)
def main(): if len(sys.argv) != 4: print( "Usage: {} [Likely_Binary_Output] [Condensed_Sparse_Output] [closed|maximal]" .format(sys.argv[0])) exit(1) lFileName = sys.argv[1] cFileName = sys.argv[2] condType = sys.argv[3] if condType == 'closed': ctype = 1 else: if condType == 'maximal': ctype = 2 else: print("third argument should be either 'closed' or 'maximal'") exit(1) (l_lipmap, l_threshold, l_numVars, l_numValsList) = readLikelyBin.likelyProbs(lFileName) if ctype == 1: c2Set = extractClosed(l_lipmap) else: c2Set = extractMaximal(set(l_lipmap.keys())) def write_itemset(out_file, itemset): size = len(itemset) for i in range(size): if itemset[i] is 1: c_file.write(str(i)) if i < size - 1: out_file.write(' ') out_file.write('\n') (min_prob, max_prob) = l_lipmap[itemset] out_file.write(str(min_prob) + '\n') out_file.write(str(max_prob) + '\n') with open(cFileName, 'w') as c_file: c_file.write(str(l_threshold) + '\n') c_file.write(str(l_numVars) + '\n') for num_vals in l_numValsList: c_file.write(str(num_vals) + '\n') for itemset in c2Set: write_itemset(c_file, itemset)
def main(): if len(sys.argv) != 3: print("Usage: {} [Likely_Sets_Binary_Output] [name_file] ".format( sys.argv[0])) exit(1) (lipmap, threshold, numVars, numValsList) = readLikelyBin.likelyProbs(sys.argv[1]) (lVarNames, lValNames) = readVarVals(sys.argv[2]) for key in lipmap.keys(): prob = lipmap[key] print("{", end='') firstvar = True for varId in range(len(key)): valId = key[varId] if valId < len(lValNames[varId]): varName = lVarNames[varId] valName = lValNames[varId][valId] if firstvar: print("({}={})".format(varName, valName), end='') firstvar = False else: print(", ({}={})".format(varName, valName), end='') print("}} {}".format(prob))
def main(): if len(sys.argv) != 5: print ("Usage: {} [Likely_Sets_Binary_Output] [name_file] [attribute_file] [variable_value_file]".format(sys.argv[0])) exit (1) (lipmap, threshold, numVars, numValsList) = readLikelyBin.likelyProbs(sys.argv[1]) (aipmap, numVars2, numValsList2) = readAllBin.allProbs(sys.argv[3], sys.argv[4]) namesFileName = sys.argv[2] varvalFileName = sys.argv[4] (lVarNames, lValNames) = readVarVals (namesFileName) (aVarNames, aValNames) = readVarVals (varvalFileName) lVarNameToId = dict() lValNameToId = dict() for lVarNameCounter in range(len(lVarNames)): lVarNameToId [lVarNames[lVarNameCounter]] = lVarNameCounter for lValNameCounter in range(len(lValNames[lVarNameCounter])): lValNameToId [(lVarNames[lVarNameCounter], lValNames[lVarNameCounter][lValNameCounter])] = lValNameCounter def convertKey (key): key2 = [0] * len(aVarNames) for varId in range(len(aVarNames)): valId = key[varId] varId2 = lVarNameToId [aVarNames[varId]] if valId < len(aValNames[varId]): valId2 = lValNameToId [(aVarNames[varId], aValNames[varId][valId])] else: valId2 = valId if valId == 0: hasZero = True key2 [varId2] = valId2 return tuple(key2) # create another dictionary for likely itemsets from `all' lipmap2 = dict() for key in aipmap.keys(): t = aipmap[key] hasZero = False if t >= threshold: key2 = convertKey(key) for attrCounter in range(len(key2)): if key2[attrCounter] == 0: hasZero = True if not hasZero: lipmap2[key2] = t # Q1. Are all likely itemsets enumerated? diff1 = set(lipmap2.keys()).difference(set(lipmap.keys())) if len(diff1): print ("Itemsets found by ALL and missed by CP:") for key in diff1: print ("{} --> {}".format(key, lipmap2[key])) diff2 = set(lipmap.keys()).difference(set(lipmap2.keys())) if len(diff2): print ("Itemsets found by CP and missed by ALL:") for key in diff2: (minprob,maxprob) = lipmap[key] print ("{} --> [{} .. {}]".format(key, minprob, maxprob)) # Q2. How close probabilities are? for key in set(lipmap.keys()).intersection(set(lipmap2.keys())): minprob = lipmap[key][0] maxprob = lipmap[key][1] trueprob = lipmap2[key] if (trueprob < minprob or trueprob > maxprob): print ("{} min:{} max:{} true:{}".format(key, minprob, maxprob, trueprob))
def main(): parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument("-b", "--cp-binary", help="the binary output file of cp itemset miner", required=True) parser.add_argument( "-a", "--enum-binary", help="the binary output of brute-force itemset enumerator", required=True) parser.add_argument("-n", "--names-file", help="the 'names' file produced by cp itemset miner", required=True) parser.add_argument( "-v", "--varval-file", help="the 'varval' file produced by brute-force itemset enumerator", required=True) parser.add_argument("-t", "--itemset-type", help="itemset types (probable, maximal, or closed)", type=str, default='probable') parser.add_argument( "-f", "--report-file", help="a file to which a detailed report will be written") args = parser.parse_args() if args.itemset_type not in {'probable', 'closed', 'maximal'}: print("Itemset type should be 'probable', 'maximal', or 'closed'") exit(1) rpf = None if (args.report_file != None): rpf = open(args.report_file, 'w') (lipmap, threshold, numVars, numValsList) = readLikelyBin.likelyProbs(args.cp_binary) (aipmap, numVars2, numValsList2) = readAllBin.allProbs(args.enum_binary, args.varval_file) namesFileName = args.names_file varvalFileName = args.varval_file (lVarNames, lValNames) = readVarVals(namesFileName) (aVarNames, aValNames) = readVarVals(varvalFileName) lVarNameToId = dict() lValNameToId = dict() for lVarNameCounter in range(len(lVarNames)): lVarNameToId[lVarNames[lVarNameCounter]] = lVarNameCounter for lValNameCounter in range(len(lValNames[lVarNameCounter])): lValNameToId[( lVarNames[lVarNameCounter], lValNames[lVarNameCounter][lValNameCounter])] = lValNameCounter def convertKey(key): key2 = [0] * len(aVarNames) for varId in range(len(aVarNames)): valId = key[varId] varId2 = lVarNameToId[aVarNames[varId]] if valId < len(aValNames[varId]): valId2 = lValNameToId[(aVarNames[varId], aValNames[varId][valId])] else: valId2 = valId key2[varId2] = valId2 return tuple(key2) # create another dictionary for likely itemsets from `all' lipmap2 = dict() for key in aipmap.keys(): t = aipmap[key] if t >= threshold: key2 = convertKey(key) lipmap2[key2] = t lipkeys = lipmap.keys() lip2keys = lipmap2.keys() domain_sizes = [len(val_names) for val_names in lValNames] if args.itemset_type == 'closed': lip2keys = extractClosed(lipmap2, domain_sizes) elif args.itemset_type == 'maximal': lip2keys = extractMaximal(set(lip2keys), domain_sizes) # Q1. Are all likely itemsets enumerated? diff1 = set(lip2keys).difference(set(lipkeys)) if len(diff1) and args.report_file != None: print("Itemsets found by ALL and missed by CP:", file=rpf) for key in diff1: print("{} --> {}".format(key, lipmap2[key]), file=rpf) diff2 = set(lipkeys).difference(set(lip2keys)) if len(diff2) and args.report_file != None: print("Itemsets found by CP and missed by ALL:", file=rpf) for key in diff2: (minprob, maxprob) = lipmap[key] print("{} --> [{} .. {}]".format(key, minprob, maxprob), file=rpf) intersect = set(lipkeys).intersection(set(lip2keys)) if (len(diff1) == 0 and len(diff2) == 0): print("[OK]\tEquivalent sets of itemsets") else: print("[FAIL]\tCP:{} Enum:{} intersection:{}".format( len(lipkeys), len(lip2keys), len(intersect))) # Q2. How close probabilities are? out_count = 0 first = True for key in intersect: minprob = lipmap[key][0] maxprob = lipmap[key][1] trueprob = lipmap2[key] if (trueprob < minprob or trueprob > maxprob): if (first and args.report_file != None): print( "Itemsets with different probabilities frome CP than brute-force enumeration", file=rpf) first = False out_count += 1 if (args.report_file != None): print("{} min:{} max:{} true:{}".format( key, minprob, maxprob, trueprob), file=rpf) if out_count == 0: print( "[OK]\tAll probabilities in intersection ({}) within range".format( len(intersect))) else: print("[FAIL]\tintersection:{} in_range:{}".format( len(intersect), len(intersect) - out_count)) if (args.report_file != None): rpf.close()