def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val):
    cr = dbOpen(db_filename)
    yob_gentable = build_numeric_dict(cr, 'YoB_bins')
    forum_gentable = build_numeric_dict(cr, 'nforum_posts_bins')
    cgtable = get_pickled_table(geo_suppress)
    class_suppress = get_pickled_table(cl_suppress)
    prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable, class_suppress)
    count_dict = make_count_dict(prop_dict)
    full_suppress_list = class_suppress
    suppress_total = len(class_suppress)
    print 'Number of suppressed records due to class identification is', suppress_total
    for i in range(1, k_val):
        count = 0
        if i not in count_dict:
            print 'No properties with only ', str(i), 'records'
            continue
        for id_pair in count_dict[i]:
            for e in prop_dict[id_pair]:
                full_suppress_list.add(e)
            count += len(prop_dict[id_pair])
            suppress_total += len(prop_dict[id_pair])
        print 'Suppress records for value ', str(i), 'is', str(count)
    print 'Total suppressed records is ', suppress_total
    outf = open(suppress_out, 'w')
    pickle.dump(full_suppress_list, outf)
    outf.close()
Esempio n. 2
0
def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val):
    cr = dbOpen(db_filename)
    yob_gentable = build_numeric_dict(cr, 'YoB_bins')
    forum_gentable = build_numeric_dict(cr, 'nforum_posts_bins')
    cgtable = get_pickled_table(geo_suppress)
    class_suppress = get_pickled_table(cl_suppress)
    prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable,
                               class_suppress)
    count_dict = make_count_dict(prop_dict)
    full_suppress_list = class_suppress
    suppress_total = len(class_suppress)
    print 'Number of suppressed records due to class identification is', suppress_total
    for i in range(1, k_val):
        count = 0
        if i not in count_dict:
            print 'No properties with only ', str(i), 'records'
            continue
        for id_pair in count_dict[i]:
            for e in prop_dict[id_pair]:
                full_suppress_list.add(e)
            count += len(prop_dict[id_pair])
            suppress_total += len(prop_dict[id_pair])
        print 'Suppress records for value ', str(i), 'is', str(count)
    print 'Total suppressed records is ', suppress_total
    outf = open(suppress_out, 'w')
    pickle.dump(full_suppress_list, outf)
    outf.close()
Esempio n. 3
0
def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val):
    cr = dbOpen(db_filename)
    yob_gentable = build_numeric_dict(cr, "YoB_bins")
    forum_gentable = build_numeric_dict(cr, "nforum_posts_bins")
    cgtable = get_pickled_table(geo_suppress)
    class_suppress = get_pickled_table(cl_suppress)
    prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable, class_suppress)
    full_suppress_list = list(class_suppress)
    suppress_total = len(class_suppress)
    print "Number of suppressed records due to class identification is", suppress_total
    for k, v in prop_dict.iteritems():
        if len(v) < k_val:
            full_suppress_list.extend(v)
    print "Total number of records suppressed =", str(len(full_suppress_list))
    outf = open(suppress_out, "w")
    full_suppress_set = set(full_suppress_list)
    pickle.dump(full_suppress_set, outf)
    outf.close()
Esempio n. 4
0
def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val):
    cr = dbOpen(db_filename)
    yob_gentable = build_numeric_dict(cr, 'YoB_bins')
    forum_gentable = build_numeric_dict(cr, 'nforum_posts_bins')
    cgtable = get_pickled_table(geo_suppress)
    class_suppress = get_pickled_table(cl_suppress)
    prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable,
                               class_suppress)
    full_suppress_list = list(class_suppress)
    suppress_total = len(class_suppress)
    print 'Number of suppressed records due to class identification is', suppress_total
    for k, v in prop_dict.iteritems():
        if len(v) < k_val:
            full_suppress_list.extend(v)
    print 'Total number of records suppressed =', str(len(full_suppress_list))
    outf = open(suppress_out, 'w')
    full_suppress_set = set(full_suppress_list)
    pickle.dump(full_suppress_set, outf)
    outf.close()
Esempio n. 5
0
def main(dbfname, suppress_fname, outfname):
    """
    Driver for the program; creates a triple of dictionaries for percentage participation by class and writes them
    to a single csv file

    The dictionaries created are for participation in the course in the full data set, in the de-identified data set,
    and in the records that are suppressed by the de-identification. These are written to the same csv file
    :param dbfname: The sqlite database containing the original data set
    :param suppress_fname: The file containing the set of suppressed records for de-identification
    :param outfname: The name of the file to create for the output. Note that if the file already exists, it will be
    overwritten
    :return: None
    """
    cr = dbOpen(dbfname)
    suppress_set = get_pickled_table(suppress_fname)

    select_str = 'Select course_id, user_id, registered, viewed, explored, certified from source'
    cr.execute(select_str)

    orig_dict = {}
    suppress_dict = {}
    anon_dict = {}
    for i in cr.fetchall():
        key = i[0] + i[1]
        ckey = i[0]
        add_participation(orig_dict, ckey, i)
        if key in suppress_set:
            add_participation(suppress_dict, ckey, i)
        else:
            add_participation(anon_dict, ckey, i)

    outf = open(outfname, 'w')
    csout = csv.writer(outf)
    csout.writerow(['Course ID', '% registered', '% viewed', '% explored', '% certified'])
    csout.writerow(['Suppressed records'])
    write_dictionary(csout, suppress_dict)
    csout.writerow(['Non-suppressed records'])
    write_dictionary(csout, anon_dict)
    csout.writerow(['Original records'])
    write_dictionary(csout, orig_dict)

    cr.close()
    outf.close()