Esempio n. 1
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(" ".join(
            ("USAGE: {}".format(os.path.basename(sys.argv[0])),
             "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
             "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(",".join(
        ("res_file={}".format(os.path.basename(res_filename)),
         "use_add_knowl={}".format(use_additional_knowledge),
         "e1={},e2={}".format(stats['epsilon_1'],
                              stats['epsilon_2']), "d={}".format(delta),
         "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(",".join(
        ("base_set={}".format(stats['base_set']),
         "maximal_itemsets={}".format(stats['maximal_itemsets']),
         "negbor={}".format(stats['negative_border']),
         "emp_vc_dim={}".format(stats['emp_vc_dim']),
         "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(",".join(
        ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
         "base_set,maximal_itemsets,negative_border,emp_vc_dim",
         "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(res_filename), use_additional_knowledge,
                   stats['epsilon_1'], stats['epsilon_2'], delta, min_freq,
                   len(trueFIs), stats['base_set'], stats['maximal_itemsets'],
                   stats['negative_border'], stats['emp_vc_dim'],
                   stats['not_emp_vc_dim'])))))
Esempio n. 2
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(" ".join(
            ("Usage: {}".format(os.path.basename(sys.argv[0])),
             "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}",
             "dataset results_filename\n")))
    dataset = sys.argv[5]
    res_filename = sys.argv[6]
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   pvalue_mode, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(",".join(
        ("res_file={}".format(os.path.basename(res_filename)),
         "use_add_knowl={}".format(use_additional_knowledge),
         "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta),
         "min_freq={}".format(min_freq), "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write(",".join(
        ("union_bound_factor={}".format(stats['union_bound_factor']),
         "critical_value={}".format(stats['critical_value']),
         "removed={}".format(stats['removed']),
         "epsilon={}\n".format(stats['epsilon']))))
    sys.stderr.write(",".join(
        ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs",
         "union_bound_factor,critical_value,removed,epsilon\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(res_filename), use_additional_knowledge,
                   pvalue_mode, delta, min_freq, len(trueFIs),
                   stats['union_bound_factor'], stats['critical_value'],
                   stats['removed'], stats['epsilon'])))))
Esempio n. 3
0
def main():
    if len(sys.argv) != 4:
        utils.error_exit("Usage: {} use_additional_knowledge={{0|1}} delta dataset\n".format(sys.argv[0]))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[2]))

    ds_stats = getDatasetInfo.get_ds_stats(sys.argv[3])

    (eps_vc_dim, eps_emp_vc_dim, returned) = epsilon_dataset(delta, ds_stats, use_additional_knowledge)

    print("{} {}".format(eps_vc_dim, eps_emp_vc_dim))
    print("{}\t{}".format(min(eps_vc_dim, eps_emp_vc_dim), returned))
Esempio n. 4
0
def main():
    if len(sys.argv) != 4:
        utils.error_exit(
            "Usage: {} use_additional_knowledge={{0|1}} delta dataset\n".
            format(sys.argv[0]))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[2]))

    ds_stats = getDatasetInfo.get_ds_stats(sys.argv[3])

    (eps_vc_dim, eps_emp_vc_dim,
     returned) = epsilon_dataset(delta, ds_stats, use_additional_knowledge)

    print("{} {}".format(eps_vc_dim, eps_emp_vc_dim))
    print("{}\t{}".format(min(eps_vc_dim, eps_emp_vc_dim), returned))
Esempio n. 5
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join((
                "Usage: {}".format(os.path.basename(sys.argv[0])),
                "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}",
                "dataset results_filename\n")))
    dataset = sys.argv[5]
    res_filename = sys.argv[6]
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   pvalue_mode, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta),
             "min_freq={}".format(min_freq),
             "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("union_bound_factor={}".format(stats['union_bound_factor']),
             "critical_value={}".format(stats['critical_value']),
             "removed={}".format(stats['removed']),
             "epsilon={}\n".format(stats['epsilon']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs",
             "union_bound_factor,critical_value,removed,epsilon\n")))
    sys.stderr.write("{}\n".format(
        ",".join(
            (str(i) for i in (os.path.basename(res_filename),
             use_additional_knowledge, pvalue_mode, delta, min_freq,
             len(trueFIs), stats['union_bound_factor'],
             stats['critical_value'], stats['removed'], stats['epsilon'])))))
Esempio n. 6
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join(
                ("USAGE: {}".format(os.path.basename(sys.argv[0])),
                 "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
                 "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq,
                                   delta, gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "e1={},e2={}".format(stats['epsilon_1'], stats['epsilon_2']),
             "d={}".format(delta),
             "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("base_set={}".format(stats['base_set']),
             "maximal_itemsets={}".format(stats['maximal_itemsets']),
             "negbor={}".format(stats['negative_border']),
             "emp_vc_dim={}".format(stats['emp_vc_dim']),
             "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
             "base_set,maximal_itemsets,negative_border,emp_vc_dim",
             "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(
        ",".join((str(i) for i in (
            os.path.basename(res_filename), use_additional_knowledge,
            stats['epsilon_1'], stats['epsilon_2'], delta,
            min_freq, len(trueFIs), stats['base_set'],
            stats['maximal_itemsets'], stats['negative_border'],
            stats['emp_vc_dim'], stats['not_emp_vc_dim'])))))