Example #1
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(
            "Usage: {} do_filter={{0|numitems}} delta min_freq pvalue_mode={{e|c}} exploreres evalres\n".format(os.path.basename(sys.argv[0])))
    exp_res_filename = sys.argv[5]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[6]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(eval_res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            " ".join(
                ("p-value mode must be 'c', 'e', or 'w'.",
                 "You passed {}\n".format(pvalue_mode))))
    try:
        do_filter = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    (trueFIs, stats) = get_trueFIs(
        exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode,
        do_filter)

    utils.print_itemsets(trueFIs, stats['orig_size'])

    sys.stderr.write("exp_res_file={},eval_res_file={},do_filter={},pvalue_mode={},d={},min_freq={},trueFIs={}\n".format(os.path.basename(exp_res_filename),os.path.basename(eval_res_filename), do_filter, pvalue_mode, delta, min_freq, len(trueFIs)))
    sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(stats['orig_size'],
        stats['exp_size'], stats['eval_size']))
    sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(stats['exp_res'],
        stats['exp_res_filtered'], stats['eval_res']))
    sys.stderr.write("filter_critical_value={},filter_epsilon={},tfis_from_exp={}\n".format(stats['filter_critical_value'],
        stats['filter_epsilon'], stats['tfis_from_exp']))
    sys.stderr.write("holdout_intersection={},holdout_false_negatives={}\n".format(stats['holdout_intersection'],
        stats['holdout_false_negatives']))
    sys.stderr.write("critical_value={},removed={},epsilon={}\n".format(stats['critical_value'],
        stats['removed'], stats['epsilon']))
    sys.stderr.write("exp_res_file,eval_res_file,do_filter,pvalue_mode,delta,min_freq,trueFIs,orig_size,exp_size,eval_size,exp_res,exp_res_filtered,eval_res,filter_critical_value,filter_epsilon,tfis_from_exp,holdout_intersection,holdout_false_negatives,critical_value,removed,epsilon\n")
    sys.stderr.write("{}\n".format(",".join((str(i) for i in
        (os.path.basename(exp_res_filename), os.path.basename(eval_res_filename),
        do_filter, pvalue_mode, delta, min_freq,len(trueFIs),
        stats['orig_size'], stats['exp_size'], stats['eval_size'],
        stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'],
        stats['filter_critical_value'], stats['filter_epsilon'],
        stats['tfis_from_exp'], stats['holdout_intersection'],
        stats['holdout_false_negatives'], stats['critical_value'],
        stats['removed'], stats['epsilon'])))))
Example #2
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(" ".join(
            ("USAGE: {}".format(os.path.basename(sys.argv[0])),
             "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
             "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(",".join(
        ("res_file={}".format(os.path.basename(res_filename)),
         "use_add_knowl={}".format(use_additional_knowledge),
         "e1={},e2={}".format(stats['epsilon_1'],
                              stats['epsilon_2']), "d={}".format(delta),
         "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(",".join(
        ("base_set={}".format(stats['base_set']),
         "maximal_itemsets={}".format(stats['maximal_itemsets']),
         "negbor={}".format(stats['negative_border']),
         "emp_vc_dim={}".format(stats['emp_vc_dim']),
         "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(",".join(
        ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
         "base_set,maximal_itemsets,negative_border,emp_vc_dim",
         "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(res_filename), use_additional_knowledge,
                   stats['epsilon_1'], stats['epsilon_2'], delta, min_freq,
                   len(trueFIs), stats['base_set'], stats['maximal_itemsets'],
                   stats['negative_border'], stats['emp_vc_dim'],
                   stats['not_emp_vc_dim'])))))
Example #3
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(" ".join(
            ("Usage: {}".format(os.path.basename(sys.argv[0])),
             "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}",
             "dataset results_filename\n")))
    dataset = sys.argv[5]
    res_filename = sys.argv[6]
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   pvalue_mode, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(",".join(
        ("res_file={}".format(os.path.basename(res_filename)),
         "use_add_knowl={}".format(use_additional_knowledge),
         "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta),
         "min_freq={}".format(min_freq), "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write(",".join(
        ("union_bound_factor={}".format(stats['union_bound_factor']),
         "critical_value={}".format(stats['critical_value']),
         "removed={}".format(stats['removed']),
         "epsilon={}\n".format(stats['epsilon']))))
    sys.stderr.write(",".join(
        ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs",
         "union_bound_factor,critical_value,removed,epsilon\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(res_filename), use_additional_knowledge,
                   pvalue_mode, delta, min_freq, len(trueFIs),
                   stats['union_bound_factor'], stats['critical_value'],
                   stats['removed'], stats['epsilon'])))))
Example #4
0
def main():
    # Verify arguments
    if len(sys.argv) != 8:
        utils.error_exit(" ".join(
            ("Usage: {}".format(os.path.basename(sys.argv[0])),
             "vcdim first_epsilon delta min_freq gap exploreres",
             "evalres\n")))
    exp_res_filename = sys.argv[6]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[7]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(eval_res_filename))
    try:
        vcdim = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        first_epsilon = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        delta = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        min_freq = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))
    try:
        gap = float(sys.argv[5])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[5]))

    (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename,
                                   min_freq, delta, gap, first_epsilon, vcdim)

    utils.print_itemsets(trueFIs, stats['orig_size'])

    sys.stderr.write(",".join(
        ("exp_res_file={}".format(os.path.basename(exp_res_filename)),
         "eval_res_file={}".format(os.path.basename(eval_res_filename)),
         "d={}".format(delta), "min_freq={}".format(min_freq),
         "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(
        stats['orig_size'], stats['exp_size'], stats['eval_size']))
    sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(
        stats['exp_res'], stats['exp_res_filtered'], stats['eval_res']))
    sys.stderr.write(",".join(
        ("holdout_intersection={}".format(stats['holdout_intersection']),
         "holdout_false_positives={}".format(stats['holdout_false_positives']),
         "holdout_false_negatives={}".format(stats['holdout_false_negatives']),
         "holdout_jaccard={}\n".format(stats['holdout_jaccard']))))
    sys.stderr.write("e1={},e2={},vcdim={}\n".format(stats['epsilon_1'],
                                                     stats['epsilon_2'],
                                                     stats['vcdim']))
    sys.stderr.write(",".join(
        ("exp_res_file,eval_res_file,delta,min_freq,trueFIs",
         "orig_size,exp_size,eval_size,exp_res,eval_res",
         "holdout_intersection,holdout_false_positives",
         "holdout_false_negatives,holdout_jaccard,e1,e2,vcdim\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(exp_res_filename),
                   os.path.basename(eval_res_filename), delta, min_freq,
                   len(trueFIs), stats['orig_size'], stats['exp_size'],
                   stats['eval_size'], stats['exp_res'], stats['eval_res'],
                   stats['holdout_intersection'],
                   stats['holdout_false_positives'],
                   stats['holdout_false_negatives'], stats['holdout_jaccard'],
                   stats['epsilon_1'], stats['epsilon_2'], stats['vcdim'])))))
Example #5
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join((
                "Usage: {}".format(os.path.basename(sys.argv[0])),
                "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}",
                "dataset results_filename\n")))
    dataset = sys.argv[5]
    res_filename = sys.argv[6]
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   pvalue_mode, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta),
             "min_freq={}".format(min_freq),
             "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("union_bound_factor={}".format(stats['union_bound_factor']),
             "critical_value={}".format(stats['critical_value']),
             "removed={}".format(stats['removed']),
             "epsilon={}\n".format(stats['epsilon']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs",
             "union_bound_factor,critical_value,removed,epsilon\n")))
    sys.stderr.write("{}\n".format(
        ",".join(
            (str(i) for i in (os.path.basename(res_filename),
             use_additional_knowledge, pvalue_mode, delta, min_freq,
             len(trueFIs), stats['union_bound_factor'],
             stats['critical_value'], stats['removed'], stats['epsilon'])))))
Example #6
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join(
                ("USAGE: {}".format(os.path.basename(sys.argv[0])),
                 "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
                 "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq,
                                   delta, gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "e1={},e2={}".format(stats['epsilon_1'], stats['epsilon_2']),
             "d={}".format(delta),
             "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("base_set={}".format(stats['base_set']),
             "maximal_itemsets={}".format(stats['maximal_itemsets']),
             "negbor={}".format(stats['negative_border']),
             "emp_vc_dim={}".format(stats['emp_vc_dim']),
             "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
             "base_set,maximal_itemsets,negative_border,emp_vc_dim",
             "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(
        ",".join((str(i) for i in (
            os.path.basename(res_filename), use_additional_knowledge,
            stats['epsilon_1'], stats['epsilon_2'], delta,
            min_freq, len(trueFIs), stats['base_set'],
            stats['maximal_itemsets'], stats['negative_border'],
            stats['emp_vc_dim'], stats['not_emp_vc_dim'])))))
Example #7
0
def main():
    # Verify arguments
    if len(sys.argv) != 8:
        utils.error_exit(
            " ".join(
                (
                    "Usage: {}".format(os.path.basename(sys.argv[0])),
                    "vcdim first_epsilon delta min_freq gap exploreres",
                    "evalres\n",
                )
            )
        )
    exp_res_filename = sys.argv[6]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit("{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[7]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit("{} does not exist, or is not a file\n".format(eval_res_filename))
    try:
        vcdim = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        first_epsilon = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        delta = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        min_freq = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))
    try:
        gap = float(sys.argv[5])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[5]))

    (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, gap, first_epsilon, vcdim)

    utils.print_itemsets(trueFIs, stats["orig_size"])

    sys.stderr.write(
        ",".join(
            (
                "exp_res_file={}".format(os.path.basename(exp_res_filename)),
                "eval_res_file={}".format(os.path.basename(eval_res_filename)),
                "d={}".format(delta),
                "min_freq={}".format(min_freq),
                "trueFIs={}\n".format(len(trueFIs)),
            )
        )
    )
    sys.stderr.write(
        "orig_size={},exp_size={},eval_size={}\n".format(stats["orig_size"], stats["exp_size"], stats["eval_size"])
    )
    sys.stderr.write(
        "exp_res={},exp_res_filtered={},eval_res={}\n".format(
            stats["exp_res"], stats["exp_res_filtered"], stats["eval_res"]
        )
    )
    sys.stderr.write(
        ",".join(
            (
                "holdout_intersection={}".format(stats["holdout_intersection"]),
                "holdout_false_positives={}".format(stats["holdout_false_positives"]),
                "holdout_false_negatives={}".format(stats["holdout_false_negatives"]),
                "holdout_jaccard={}\n".format(stats["holdout_jaccard"]),
            )
        )
    )
    sys.stderr.write("e1={},e2={},vcdim={}\n".format(stats["epsilon_1"], stats["epsilon_2"], stats["vcdim"]))
    sys.stderr.write(
        ",".join(
            (
                "exp_res_file,eval_res_file,delta,min_freq,trueFIs",
                "orig_size,exp_size,eval_size,exp_res,eval_res",
                "holdout_intersection,holdout_false_positives",
                "holdout_false_negatives,holdout_jaccard,e1,e2,vcdim\n",
            )
        )
    )
    sys.stderr.write(
        "{}\n".format(
            ",".join(
                (
                    str(i)
                    for i in (
                        os.path.basename(exp_res_filename),
                        os.path.basename(eval_res_filename),
                        delta,
                        min_freq,
                        len(trueFIs),
                        stats["orig_size"],
                        stats["exp_size"],
                        stats["eval_size"],
                        stats["exp_res"],
                        stats["eval_res"],
                        stats["holdout_intersection"],
                        stats["holdout_false_positives"],
                        stats["holdout_false_negatives"],
                        stats["holdout_jaccard"],
                        stats["epsilon_1"],
                        stats["epsilon_2"],
                        stats["vcdim"],
                    )
                )
            )
        )
    )