Example #1
0
def get_counts_augment_R1_all(R1_name, type_of_tuples):
    aug_q = []
    for i in type_of_tuples:
        l = [i[0], [i[1]], [i[2]]]
        l.extend([-1, -1])
        aug_q.append(l)
    
    aug_counts = []
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        for tup in aug_q:
            q = "SELECT COUNT(*) FROM " + R1_name + " WHERE AGEP>=" + str(tup[0][0]) + " AND AGEP<=" + str(tup[0][1]) +\
                " AND RELP=" + str(tup[1][0]) + " AND SEX=" + str(tup[2][0]) + ";"
            cursor.execute(q)
            aug_counts.append(cursor.fetchone()[0])

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    return aug_q, aug_counts
def update_V1_root(S_CC, T, r):
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        cc = S_CC[r]
        b = T.nodes[r]['target']
        b_sum_children = 0

        query = "WITH num_tuples_per_x AS (SELECT * FROM V1 WHERE PUMA10 = -1 AND TEN = -1"
        for i in range(num_R1_non_key_attrs):
            if cc[i] != -1:
                attr = attr_names[i]
                if i == 0:
                    query += " AND " + attr + " >= " + str(
                        cc[i][0]) + " AND " + attr + " <= " + str(cc[i][1])
                else:
                    query += " AND " + attr + " = " + str(cc[i][0])
        # When not at the leaf node, must update the SELECT condition
        for child in T.successors(
                r
        ):  # Path graph == 1 successor; OTHERWISE CC at root in subtree
            cc_child = S_CC[child]

            query += " AND NOT("
            for i in range(num_R1_non_key_attrs):
                if cc_child[i] != -1:
                    attr = attr_names[i]
                    if i == 0:
                        query += attr + " >= " + str(
                            cc_child[i][0]) + " AND " + attr + " <= " + str(
                                cc_child[i][1]) + " AND "
                    else:
                        query += attr + " = " + str(cc_child[i][0]) + " AND "
            query = query[:-5] + ")"

            b_sum_children += T.nodes[child]['target']
        # Must use difference between targets of the current CC and it's successor CC in T
        query += " LIMIT " + str(b - b_sum_children) + ") UPDATE V1 SET "
        for j in range(num_R2_non_key_attrs):
            if cc[num_R1_non_key_attrs + j] != -1:
                query += attr_names[num_R1_non_key_attrs + j] + " = " + str(
                    cc[num_R1_non_key_attrs + j][0]) + ", "
        query = query[:-2] + " FROM num_tuples_per_x WHERE V1.p_id=num_tuples_per_x.p_id"

        print("\t\t", query)
        cursor.execute(query)

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    return
def get_counts_augment_R1_3way(S_CC, agep_intervals_used_in_S_CC,
                               num_R1_non_key_attrs, R1_name):
    aug_q = []
    aug_counts = []

    # for constraint in S_CC: use constraint[:3] to track combos of AGEP-RELP-SEX values to use when augmenting S_CC
    combos = []
    for constraint in S_CC:
        k = constraint[:num_R1_non_key_attrs]
        if k not in combos:
            combos.append(k)

            agep_vals = [k[0]]
            if k[0] == -1:
                agep_vals = agep_intervals_used_in_S_CC  # [[l_1, u_1], [l_2, u_2], ...]

            relp_vals = k[1]
            if k[1] == -1:
                relp_vals = attr_intervalization[1]

            sex_vals = k[2]
            if k[2] == -1:
                sex_vals = attr_intervalization[2]

            print("\n%s" % (k))
            for a1 in agep_vals:
                for a2 in relp_vals:
                    for a3 in sex_vals:
                        q = [a1, [a2], [a3], -1, -1]
                        if q not in aug_q:
                            aug_q.append(q)
                            print("\t%s" % (aug_q[-1]))
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        for tup in aug_q:
            q = "SELECT COUNT(*) FROM " + R1_name + " WHERE AGEP>=" + str(tup[0][0]) + " AND AGEP<=" + str(tup[0][1]) +\
                " AND RELP=" + str(tup[1][0]) + " AND SEX=" + str(tup[2][0]) + ";"
            cursor.execute(q)
            aug_counts.append(cursor.fetchone()[0])

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    return aug_q, aug_counts
Example #4
0
def get_existing_tuple_types(R1_name):
    type_of_tuples = []
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        cursor.execute("SELECT AGEP, RELP, SEX FROM " + R1_name + " GROUP BY AGEP, RELP, SEX ORDER BY SEX, AGEP, RELP")
        distinct_tuples = cursor.fetchall()

        for i in distinct_tuples:
            type_of_tuples.append([int(i[0]), int(i[1]), int(i[2])])

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    return type_of_tuples
def update_V1_all_disjoint(S_CC, T):
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        for vx in T:
            cc = S_CC[vx]
            b = T.nodes[vx]['target']

            query = "WITH num_tuples_per_x AS (SELECT * FROM V1 WHERE PUMA10 = -1 AND TEN = -1"
            for i in range(num_R1_non_key_attrs):
                if cc[i] != -1:
                    attr = attr_names[i]
                    if i == 0:
                        query += " AND " + attr + " >= " + str(
                            cc[i][0]) + " AND " + attr + " <= " + str(cc[i][1])
                    else:
                        query += " AND " + attr + " = " + str(cc[i][0])
            query += " LIMIT " + str(b) + ") UPDATE V1 SET "
            for j in range(num_R2_non_key_attrs):
                if cc[i + 1 + j] != -1:
                    query += attr_names[i + 1 + j] + " = " + str(
                        cc[i + 1 + j][0]) + ", "
            query = query[:-2] + " FROM num_tuples_per_x WHERE V1.p_id=num_tuples_per_x.p_id"

            print("\t\t", query)
            cursor.execute(query)

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    return
def main(CC_q_filename, V1_CC_filename, output_filename):
    ''' ----------------------------------------------- List vars ------------------------------------------------ '''
    S_CC = []
    b_S_CC = []
    total = 0
    num_CCs = 0

    disjoint_CC = {}
    intersect_with = {}
    T = nx.DiGraph()

    intersecting_S_CC = []
    intersecting_b_S_CC = []

    edges_to_remove = []

    used_tens = []  # TEN vals in S_CC
    used_tens_without_puma = []
    tens_onlyWithPUMA10_or_unused = []

    used_pumas = []  # PUMA10 vals in S_CC
    used_pumas_without_ten = []
    pumas_onlyWithTEN_or_unused = []

    ten_puma_combos_in_CCs = []  # TEN-PUMA10 combinations in S_CC
    ten_dict = {}  # Combinations "indexed" by TEN vals
    puma_dict = {}  # Combinations "indexed" by PUMA10 vals

    unused_combos_in_CCs_by_puma = [
    ]  # Candidate PUMA10 vals for tuples missing assignment after lp.py

    err = -1

    l_ppl_in_diff_puma_ten = {}
    l_hhs_in_diff_puma_ten = {}

    io_stats = open(output_filename, "a+")
    io_stats.write(
        "-------------------------------------------------------------------------------------------------------------------------------------\n"
    )
    io_stats.write("Create V2 ---- start time %s\n" %
                   (datetime.datetime.now()))
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        cursor.execute("DROP TABLE IF EXISTS V2 CASCADE")
        cursor.execute("CREATE TABLE V2 AS SELECT h_id, PUMA10, TEN FROM " +
                       h_table)

        cursor.execute("DROP TABLE IF EXISTS V1 CASCADE")
        cursor.execute("CREATE TABLE V1 AS SELECT * FROM " + p_table)
        cursor.execute("ALTER TABLE V1 ADD COLUMN PUMA10 INTEGER DEFAULT -1")
        cursor.execute("ALTER TABLE V1 ADD COLUMN TEN INTEGER DEFAULT -1")

        # Also, create the final relation, i.e., p_with_hid(p_id, AGEP, RELP, SEX, h_id)
        cursor.execute("DROP TABLE IF EXISTS p_with_hid CASCADE")
        cursor.execute("CREATE TABLE p_with_hid AS SELECT * FROM " + p_table)
        cursor.execute(
            "ALTER TABLE p_with_hid ADD COLUMN h_id INTEGER DEFAULT -1")

        cursor.execute("SELECT COUNT(*) FROM " + h_table)
        n_hh = cursor.fetchone()[0]
        cursor.execute("SELECT COUNT(*) FROM " + p_table)
        n_ppl = cursor.fetchone()[0]
        lp.save_num_HHs_ppl(n_hh, n_ppl)
        '''cursor.execute("SELECT array(SELECT DISTINCT TEN FROM " + h_table + " ORDER BY TEN)")
		active_tens = list(cursor.fetchone()[0])
		
		cursor.execute("SELECT array(SELECT DISTINCT PUMA10 FROM " + h_table + " ORDER BY PUMA10)")
		active_pumas = list(cursor.fetchone()[0])'''

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    io_stats.write("Create V2 ---- end time %s\n" % (datetime.datetime.now()))
    io_stats.write(
        "----------------------------------------------------------------------------\n\n"
    )
    ''' --------------------------------------------------------------------------------------------------------------
	                                					Read S_CC
	-------------------------------------------------------------------------------------------------------------- '''
    V1_CCs = open(V1_CC_filename, "r")

    for line in V1_CCs:
        k_v = line.rstrip().split(":")
        total += int(k_v[1])
        l = k_v[0].split(",")

        constraint_l = []
        t_p_combo = []
        for i in range(5):
            if l[i] == '-1':
                constraint_l.append(-1)
                if i == 3 or i == 4:
                    t_p_combo.append(-1)
            else:
                l[i] = l[i][1:-1].split("-")  # Remove "[", "]"
                l[i] = [int(j) for j in l[i]]
                constraint_l.append(l[i])
                # Populate used_tens and used_pumas
                if i == 3:
                    if l[i][0] not in used_tens:
                        used_tens.append(l[i][0])
                    t_p_combo.append(l[i][0])
                elif i == 4:
                    if l[i][0] not in used_pumas:
                        used_pumas.append(l[i][0])
                    t_p_combo.append(l[i][0])
        if -1 not in t_p_combo and t_p_combo not in ten_puma_combos_in_CCs:
            ten_puma_combos_in_CCs.append(t_p_combo)
        else:
            ten = t_p_combo[0]
            puma = t_p_combo[1]
            if ten != -1 and ten not in used_tens_without_puma:
                used_tens_without_puma.append(ten)
            if puma != -1 and puma not in used_pumas_without_ten:
                used_pumas_without_ten.append(puma)

        S_CC.append(constraint_l)
        b_S_CC.append(int(k_v[1]))
    V1_CCs.close()
    num_CCs = len(S_CC)

    for i in range(num_CCs):
        print("%5d. %s" % (i, S_CC[i]))
    print("\nTotal CC target = %s\n" % (total))

    unused_tens = sorted(list(set(dom_ten).difference(set(used_tens))))
    unused_pumas = sorted(list(set(dom_puma).difference(set(used_pumas))))
    print("\nTEN values NOT used in S_CC: %s\n" % (unused_tens))
    print("PUMA10 values NOT used in S_CC: %s\n" % (unused_pumas))
    print("TEN values used WITHOUT PUMA10 in some CC: %s\n" %
          (used_tens_without_puma))
    print("PUMA10 values used WITHOUT TEN in some CC: %s\n\n" %
          (used_pumas_without_ten))

    for [t, p] in ten_puma_combos_in_CCs:
        #if t not in used_tens_without_puma:
        if t not in ten_dict:
            ten_dict[t] = [p]
        else:
            ten_dict[t].append(p)

        #if p not in used_pumas_without_ten:
        if p not in puma_dict:
            puma_dict[p] = [t]
        else:
            puma_dict[p].append(t)

    pumas_onlyWithTEN_or_unused = list(
        set(dom_puma).difference(set(used_pumas_without_ten)))
    tens_onlyWithPUMA10_or_unused = list(
        set(dom_ten).difference(set(used_tens_without_puma)))
    for p in pumas_onlyWithTEN_or_unused:
        # If p does not appear with every TEN val OR p is not used and there exist TEN vals that appear ONLY WITH PUMA10 in S_CC
        if (p in puma_dict and len(puma_dict[p]) < size_dom_ten) or (
                p in unused_pumas and len(tens_onlyWithPUMA10_or_unused) > 0):
            unused_combos_in_CCs_by_puma.append(p)
    ''' --------------------------------------------------------------------------------------------------------------
	                                				Construct T and \cap_i
	-------------------------------------------------------------------------------------------------------------- '''
    io_stats.write(
        "Pairwise comparisons on S_CC and constructing T ---- start time %s\n"
        % (datetime.datetime.now()))
    lp.save_time(datetime.datetime.now())

    for i in range(num_CCs):
        T.add_node(i, name="CC" + str(i), target=b_S_CC[i])

    disjoint_CC = {i: [] for i in range(num_CCs)}
    for i in range(num_CCs):
        for j in range(num_CCs):
            if i != j:
                R1_i = S_CC[i][:num_R2_non_key_attrs + 1]
                R1_j = S_CC[j][:num_R2_non_key_attrs + 1]

                R2_i = S_CC[i][-1 * num_R2_non_key_attrs:]
                R2_j = S_CC[j][-1 * num_R2_non_key_attrs:]

                disjoint = False
                contain = True
                # R1
                for k in range(num_R1_non_key_attrs):
                    R1_i_attr = R1_i[k]
                    R1_j_attr = R1_j[k]

                    if R1_i_attr != -1 and R1_j_attr != -1:
                        # Disjoint on R1? Enough to disagree on ONE attr used in the CCs
                        if (k == 0 and (R1_i_attr[1] < R1_j_attr[0] or R1_i_attr[0] > R1_j_attr[1])) or \
                         (k != 0 and R1_i_attr != R1_j_attr):
                            disjoint = True
                        # Contained on R1? Then check if contained on R2!
                        if not((k == 0 and R1_i_attr[0] >= R1_j_attr[0] and R1_i_attr[1] <= R1_j_attr[1]) or \
                         (k != 0 and R1_i_attr == R1_j_attr)):
                            contain = False
                    elif R1_j_attr != -1 and R1_i_attr == -1:
                        contain = False
                # R2
                if contain:
                    for k in range(num_R2_non_key_attrs):
                        R2_i_attr = R2_i[k]
                        R2_j_attr = R2_j[k]

                        # If R1 preds match but disjoint R2 preds ----> Pair of CCs is disjoint, and NOT intersecting
                        if R1_i == R1_j:
                            if R2_i_attr != -1 and R2_j_attr != -1 and R2_i_attr != R2_j_attr:
                                disjoint = True
                                contain = False
                                break
                        # If R1_i is more restrictive than R1_j, BUT R2_i is not more restrictive than R2_j ----> NOT contained
                        else:
                            if (R2_i_attr != -1 and R2_j_attr != -1 and R2_i_attr != R2_j_attr) or \
                             (R2_j_attr != -1 and R2_i_attr == -1):
                                contain = False
                                break
                # (Disjoint) Add pairs to dict
                if disjoint:
                    disjoint_CC[i].append(j)
                # (Containment) Add edge from CC_j to CC_i IF CC_i\subseteq CC_j AND they do NOT contradict on R2
                elif not disjoint and contain:
                    print("Add edge %s -> %s" % (j, i))
                    T.add_edge(j, i)
    print()

    for i in range(num_CCs):
        for j in range(num_CCs):
            if i != j:
                if j not in disjoint_CC[i] and not (T.has_edge(i, j)
                                                    or T.has_edge(j, i)):
                    if i not in intersect_with:
                        intersect_with[i] = [j]
                    else:
                        intersect_with[i].append(j)
    ''' ----------------------------------- Find and remove redundant edges from T ----------------------------------- '''
    #plot_forest(T)
    list_of_edges = copy.deepcopy(T.edges())
    for (u, v) in list_of_edges:
        T.remove_edge(u, v)
        #plot_forest(T)
        if not nx.has_path(
                T, u,
                v):  # If removing the edge disconnects v from u, add edge back
            T.add_edge(u, v)
    #plot_forest(T)
    io_stats.write(
        "Pairwise comparisons on S_CC and constructing T ---- end time %s\n\n"
        % (datetime.datetime.now()))
    lp.save_time(datetime.datetime.now())
    ''' --------------------------------------------------------------------------------------------------------------
	                                			ViewCompletion (hierarchical)
	-------------------------------------------------------------------------------------------------------------- '''
    # Find "good" trees T_i in the forest T to recurse on
    vxs = copy.deepcopy(T.nodes())
    for vx in vxs:
        if vx in T and T.in_degree(vx) == 0:  # At ROOT!
            bad_T_i = False

            comp_nodes = list(nx.dfs_postorder_nodes(
                T, vx))  # Get nodes in root's tree
            for vx in comp_nodes:
                if vx in intersect_with:
                    bad_T_i = True
                    break
            if bad_T_i:  # T_i bad ----> Remove T_i from T
                for vx in comp_nodes:
                    T.remove_node(vx)
                    ''' --------------------------- S_CC and b_S_CC for intersecting CCs ------------------------- '''
                    intersecting_S_CC.append(S_CC[vx])
                    intersecting_b_S_CC.append(b_S_CC[vx])
                    print("%5d %s" %
                          (intersecting_b_S_CC[-1], intersecting_S_CC[-1]))

    #plot_forest(T)

    # START V1 COMPLETION ...
    io_stats.write("Algo 1.1 ---- start time %s\n" % (datetime.datetime.now()))
    lp.save_time(datetime.datetime.now())
    view_completion(S_CC, T)

    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        #--------------------------------- FIX tuples with TEN but no PUMA10 value -----------------------------------
        cursor.execute(
            "SELECT p_id, TEN FROM V1 WHERE PUMA10 = -1 AND TEN != -1")
        y_ten_n_puma = list(cursor.fetchall())
        for tup in y_ten_n_puma:
            id = tup[0]
            ten = tup[1]

            puma = -1
            if ten in ten_dict:
                puma_candidates = list(
                    set(dom_puma).difference(set(ten_dict[ten])))
                if len(puma_candidates) > 0:
                    puma = random.choice(puma_candidates)
                else:
                    print("\n\nNo TEN options\n\n")
            else:
                puma = random.choice(pumas_onlyWithTEN_or_unused)

            query = "UPDATE V1 SET PUMA10 = " + str(
                puma) + " WHERE p_id = " + str(id)
            cursor.execute(query)

        #---------------------------------- FIX tuples with PUMA10 but no TEN value ----------------------------------
        cursor.execute(
            "SELECT p_id, PUMA10 FROM V1 WHERE PUMA10 != -1 AND TEN = -1")
        n_ten_y_puma = list(cursor.fetchall())
        for tup in n_ten_y_puma:
            id = tup[0]
            puma = tup[1]

            ten = -1
            if puma in puma_dict:
                ten_candidates = list(
                    set(dom_ten).difference(set(puma_dict[puma])))
                if len(ten_candidates) > 0:
                    ten = random.choice(ten_candidates)
                else:
                    print("\n\nNo PUMA10 options\n\n")
            else:
                ten = random.choice(tens_onlyWithPUMA10_or_unused)

            query = "UPDATE V1 SET TEN = " + str(ten) + " WHERE p_id = " + str(
                id)
            cursor.execute(query)

        io_stats.write("Algo 1.1 ---- end time %s\n\n" %
                       (datetime.datetime.now()))
        lp.save_time(datetime.datetime.now())

        # CHECK CC VIOLATIONS
        err = 0
        with open(CC_q_filename, "r") as q:
            lines = q.readlines()
            for i in range(len(lines)):
                l = lines[i]
                k_v = l.rstrip().split(":")
                pred = k_v[0]
                target_count = int(k_v[1])

                cursor.execute("SELECT COUNT(*) FROM V1 WHERE " + pred)
                ans = cursor.fetchone()[0]

                if i in T and target_count != ans:
                    if err == 0:
                        io_stats.write(
                            "\t(Unsatisfied disjoint or contained) CCs with target counts minus counts in V1\n"
                        )
                    io_stats.write(
                        "\t%5d - %5d = %5d \t%s\n" %
                        (target_count, ans, target_count - ans, pred))
                    err += abs(target_count - ans)
        lp.save_tot_L1_err("%s\n" % (err))

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?")
        print(e)

    io_stats.write(
        "----------------------------------------------------------------------------\n\n"
    )
    io_stats.close()
    ''' --------------------------------------------------------------------------------------------------------------
	                                		Pass any intersecting CCs to lp.py
	-------------------------------------------------------------------------------------------------------------- '''
    if len(intersecting_S_CC) > 0:
        lp.main(num_R1_non_key_attrs, intersecting_S_CC, intersecting_b_S_CC,
                p_table, h_table, CC_q_filename, output_filename)
    ''' --------------------------------------------------------------------------------------------------------------
	1.					Assign unused TEN-PUMA10 combo to tuples in V1 to complete assignment
	------------------------------------------------------------------------------------------------------------------
	2.												Check CC violations
	-------------------------------------------------------------------------------------------------------------- '''
    io_stats = open(output_filename, "a+")
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        io_stats.write(
            "\n\nPartition people and households by PUMA10-TEN ---- start time %s\n"
            % (datetime.datetime.now()))
        lp.save_time("%s" % (datetime.datetime.now()))
        # CREATE A DICTIONARY THAT PARTITIONS PEOPLE INTO (PUMA, TEN)
        cursor.execute(
            "SELECT p_id, TEN, PUMA10 FROM V1 WHERE PUMA10 != -1 AND TEN != -1"
        )
        rows = cursor.fetchall()
        for row in rows:
            p_id = row[0]
            ten = int(row[1])
            puma = row[2]
            if (puma, ten) not in l_ppl_in_diff_puma_ten:
                l_ppl_in_diff_puma_ten[(puma, ten)] = [p_id]
            else:
                l_ppl_in_diff_puma_ten[(puma, ten)].append(p_id)
        # CREATE A DICTIONARY THAT PARTITIONS HHs INTO (PUMA, TEN)
        cursor.execute("SELECT h_id, PUMA10, TEN FROM V2")
        rows = cursor.fetchall()
        for row in rows:
            h_id = row[0]
            puma = row[1]
            ten = int(row[2])
            if (puma, ten) not in l_hhs_in_diff_puma_ten:
                l_hhs_in_diff_puma_ten[(puma, ten)] = [h_id]
            else:
                l_hhs_in_diff_puma_ten[(puma, ten)].append(h_id)

        #----------------------------------- Complete partially filled tuples in V1 ----------------------------------
        cursor.execute("SELECT p_id FROM V1 WHERE PUMA10 = -1 AND TEN = -1")
        miss_assign_V1 = list(cursor.fetchall())
        if len(miss_assign_V1) > 0 and len(unused_combos_in_CCs_by_puma) > 0:
            print("\n\nNumber of tuples without assignment = %s\n\n" %
                  (len(miss_assign_V1)))
            io_stats.write("\tComplete V1 assignment ---- start time %s\n" %
                           (datetime.datetime.now()))
            #------------------------------------ Create list of TEN-PUMA10 combos -----------------------------------
            combos = []
            for p in unused_combos_in_CCs_by_puma:
                if p in puma_dict:
                    ts = list(set(dom_ten).difference(set(puma_dict[p])))
                else:
                    ts = tens_onlyWithPUMA10_or_unused
                for t in ts:
                    combos.append([t, p])
            print(
                "Combinations valid for tuples missing assignment after lp.py: %s\n\n"
                % (combos))
            io_stats.write("\t\tConstructing combos (end) %s\n" %
                           (datetime.datetime.now()))

            for tup in miss_assign_V1:
                p_id = tup[0]
                [ten, puma] = random.choice(combos)
                if (puma, ten) not in l_ppl_in_diff_puma_ten:
                    l_ppl_in_diff_puma_ten[(puma, ten)] = [p_id]
                else:
                    l_ppl_in_diff_puma_ten[(puma, ten)].append(p_id)

            io_stats.write("\tComplete V1 assignment ---- end time %s\n" %
                           (datetime.datetime.now()))
        else:
            print("\n\nAll TEN-PUMA10 combinations used in S_CC\n\n")

        io_stats.write(
            "Partition people and households by PUMA10-TEN ---- end time %s\n"
            % (datetime.datetime.now()))
        lp.save_time("%s\n" % (datetime.datetime.now()))

        # CHECK CC VIOLATIONS
        io_stats.write("\n\nCCs with target counts minus counts in solution\n")
        with open(CC_q_filename, "r") as q:
            err = 0
            for l in q:
                k_v = l.rstrip().split(":")
                pred = k_v[0]
                target_count = int(k_v[1])

                cursor.execute("SELECT COUNT(*) FROM V1 WHERE " + pred)
                ans = cursor.fetchone()[0]

                io_stats.write("%5d - %5d = %5d \t%s\n" %
                               (target_count, ans, target_count - ans, pred))
                err += abs(target_count - ans)
        lp.save_tot_L1_err("%s (Step 1 final)\n" % (err))

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?")
        print(e)

    io_stats.close()
    return l_ppl_in_diff_puma_ten, l_hhs_in_diff_puma_ten
Example #7
0
def main(CC_q_filename, V1_CC_filename, output_filename):
    ''' ----------------------------------------------- List vars ------------------------------------------------ '''
    type_of_tuples = []
    num_types = 0
    n_men = 0
    n = 0

    n_age = 0
    n_ten = 0
    n_puma = 0

    aug_q = []
    aug_counts = []
    S_CC = []
    b_S_CC = []

    A = []

    io_stats = open(output_filename, "a")
    io_stats.write(
        "-------------------------------------------------------------------------------------------------------------------------------------\n"
    )
    io_stats.write("\nCreate V2 ---- start time %s\n" %
                   (datetime.datetime.now()))
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        cursor.execute("DROP TABLE IF EXISTS V2 CASCADE")
        cursor.execute("CREATE TABLE V2 AS SELECT h_id, PUMA10, TEN FROM " +
                       h_table)

        cursor.execute("DROP TABLE IF EXISTS V1 CASCADE")
        cursor.execute("CREATE TABLE V1 AS SELECT * FROM " + p_table)
        cursor.execute("ALTER TABLE V1 ADD COLUMN PUMA10 INTEGER DEFAULT -1")
        cursor.execute("ALTER TABLE V1 ADD COLUMN TEN INTEGER DEFAULT -1")

        # Also, create the final relation, i.e., p_with_hid(p_id, AGEP, RELP, SEX, h_id)
        cursor.execute("DROP TABLE IF EXISTS p_with_hid CASCADE")
        cursor.execute("CREATE TABLE p_with_hid AS SELECT * FROM " + p_table)
        cursor.execute(
            "ALTER TABLE p_with_hid ADD COLUMN h_id INTEGER DEFAULT -1")

        cursor.execute("SELECT COUNT(*) FROM " + h_table)
        n_hh = cursor.fetchone()[0]
        cursor.execute("SELECT COUNT(*) FROM " + p_table)
        n_ppl = cursor.fetchone()[0]

        save_num_HHs_ppl(n_hh, n_ppl)

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)
    io_stats.write("Create V2 ---- end time %s\n" % (datetime.datetime.now()))
    io_stats.write(
        "----------------------------------------------------------------------------\n"
    )

    io_stats.write("CCs to Ax=b ---- start time %s\n" %
                   (datetime.datetime.now()))
    save_time(datetime.datetime.now())

    V1_CCs = open(V1_CC_filename, "r")
    for line in V1_CCs:
        k_v = line.rstrip().split(":")

        l = k_v[0].split(",")
        constraint_l = []
        for i in range(5):
            if l[i] == '-1':
                constraint_l.append(-1)
            else:
                l[i] = l[i][1:-1].split("-")  # Remove "[", "]"
                l[i] = [int(j) for j in l[i]]
                constraint_l.append(l[i])
        S_CC.append(constraint_l)
        b_S_CC.append(int(k_v[1]))
    V1_CCs.close()

    set_num_S_CCs(len(b_S_CC))
    ''' --------------------------------------------------------------------------------------------------------------
                                    Get attr_intervalization for attributes in R1
    -------------------------------------------------------------------------------------------------------------- '''
    # 1. Store breaking points from the domain of attributes used in S_CC
    for constraint in S_CC:
        for i in range(num_R1_non_key_attrs):
            if constraint[i] != -1:
                for j in range(len(constraint[i])):
                    val = constraint[i][j]
                    if j == 1:
                        val += 1  # [0, 17] makes list [0, 18] for [0, 18), [18, ...)
                    if val not in attr_intervalization[i]:
                        attr_intervalization[i].append(val)
    # 2. Sort and adjust AGEP's intervals
    for i in range(num_R1_non_key_attrs):
        attr_intervalization[i].sort()
        if i == 0:  # AGEP domain is [0, 114]
            if 0 not in attr_intervalization[i]:
                l = [0]
                l.extend(attr_intervalization[i])
                attr_intervalization[i] = l
            if 115 not in attr_intervalization[i]:
                attr_intervalization[i].append(115)
    n_age = len(attr_intervalization[0])
    ''' --------------------------------------------------------------------------------------------------------------
                            Get tuple types and merge using attrs that allow intervals in S_{CC}
    -------------------------------------------------------------------------------------------------------------- '''
    type_of_tuples = get_existing_tuple_types(p_table)
    print("\nType of tuples before binning by intervals = %s" %
          (len(type_of_tuples)))
    io_stats.write("\t\tType of tuples before binning by intervals = %s\n" %
                   (len(type_of_tuples)))

    # Update the agep value to the corresponding interval from agep intervalization
    first_woman = False
    i = 0
    for tup in type_of_tuples:
        if tup[2] == 2 and not first_woman:
            i = 0
            first_woman = True
        u = attr_intervalization[0][i + 1]
        while tup[0] >= u:
            i += 1
            u = attr_intervalization[0][i + 1]
        l = attr_intervalization[0][i]
        u = attr_intervalization[0][i + 1]
        tup[0] = [l, u - 1]
    # Remove duplicates below
    no_dup = []
    for tup in type_of_tuples:
        if tup not in no_dup:
            no_dup.append(tup)
            if tup[2] == 1:
                n_men += 1
    type_of_tuples = no_dup
    num_types = len(type_of_tuples)
    print("Number of tuple types in R1 (using AGEP intervalization) = %s" %
          (num_types))
    io_stats.write(
        "\t\tNumber of tuple types in R1 (using AGEP intervalization) = %s\n" %
        (num_types))
    n = num_types
    ''' --------------------------------------------------------------------------------------------------------------
                            Get active domain for attributes in R2, and total number of vars in IP
    -------------------------------------------------------------------------------------------------------------- '''
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        cursor.execute("SELECT array(SELECT DISTINCT TEN FROM " + h_table +
                       " ORDER BY TEN)")
        attr_intervalization[3] = list(cursor.fetchone()[0])
        n_ten = len(attr_intervalization[3])
        if n_ten > 0:
            n = n * n_ten

        cursor.execute("SELECT array(SELECT DISTINCT PUMA10 FROM " + h_table +
                       " ORDER BY PUMA10)")
        attr_intervalization[4] = list(cursor.fetchone()[0])
        n_puma = len(attr_intervalization[4])
        if n_puma > 0:
            n = n * n_puma

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)

    print("\t\tDistinct values (TEN, PUMA10):\t%s\n\t\t%s\n" %
          (attr_intervalization[3], attr_intervalization[4]))
    print("Number of variables = %s" % (n))
    io_stats.write("\n\t\tNumber of variables = %2d\n" % (n))
    ''' --------------------------------------------------------------------------------------------------------------
                            Use S_CC and add the corresponding x_i's to populate rows in A and b
    ------------------------------------------------------------------------------------------------------------------
                            DO NOT augment input set of CCs with AGEP-RELP-SEX marginals from R_1
    -------------------------------------------------------------------------------------------------------------- '''
    aug_q = S_CC
    aug_counts = b_S_CC

    for i in aug_q:
        row = []

        if i[4] != -1:
            idx_puma = attr_intervalization[4].index(i[4][0])
            row = [0] * num_types * n_ten * idx_puma

        if i[0] == -1:
            row_sub = no_agep(i[1], i[2], type_of_tuples, n_men)
        else:
            row_sub = given_agep(i[0], i[1], i[2], type_of_tuples, n_men)

        if i[3] == -1:
            row.extend(row_sub * n_ten)
        elif i[3][0] == 1:
            row.extend(row_sub)
            if n_ten >= 2:
                row.extend([0] * int(n / (n_puma * n_ten) *
                                     (n_ten - 1)))  # Ignore other TENs
        elif i[3][0] == 2:
            row.extend([0] * int(n / (n_puma * n_ten)))
            row.extend(row_sub)
            if n_ten >= 3:
                row.extend([0] * int(n / (n_puma * n_ten) *
                                     (n_ten - 2)))  # Ignore other TENs
        elif i[3][0] == 3:
            row.extend([0] * int(2 * n / (n_puma * n_ten)))
            row.extend(row_sub)
            if n_ten == 4:
                row.extend([0] * int(n / (n_puma * n_ten))
                           )  # Ignore other TENs ... n_ten - 3 is 1 here
        elif i[3][0] == 4:
            row.extend([0] * int(3 * n / (n_puma * n_ten)))
            row.extend(row_sub)

        if i[4] != -1:
            num_pumas_rem = n_puma - attr_intervalization[4].index(i[4][0]) - 1
            row.extend([0] * int(n / n_puma) * num_pumas_rem)
        else:
            row.extend(row * (n_puma - 1))

        A.append(row)

    print("\nSolving IP ....")
    prob = LpProblem("myProblem", LpMinimize)
    ppl_types = [i for i in range(n)]

    ppl_vars = LpVariable.dicts("person", ppl_types, lowBound=0, cat='Integer')
    #print(ppl_vars)

    prob += 0

    for i in range(len(A)):
        prob += lpSum([A[i][j] * ppl_vars[j]
                       for j in ppl_types]) == aug_counts[i], "CC" + str(i)

    status = prob.solve()
    io_stats.write("\t\t******** STATUS = %s\n" % (LpStatus[prob.status]))
    print("\t\t******** STATUS = %s\n" % (LpStatus[prob.status]))

    x = [0 for i in range(n)]
    for v in prob.variables():
        #print(v.name, "=", v.varValue)
        if v.name != '__dummy':
            x[int(v.name.split("_")[1])] = int(v.varValue)

    io_stats.write("\tCCs to Ax=b ---- end time %s\n\n" %
                   (datetime.datetime.now()))
    save_time("%s\n" % (datetime.datetime.now()))

    io_stats.write("\tAx=b to V1 ---- start time %s\n" %
                   (datetime.datetime.now()))
    save_time(datetime.datetime.now())

    # Create UPDATE queries to run on V_1 in PostgreSQL on the basis of x. We know the values x[i]'s encode.
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        # AGEP, RELP, SEX, TEN, PUMA10
        for p in range(len(attr_intervalization[4])):
            for t in range(len(attr_intervalization[3])):
                for i in range(num_types):
                    x_i_val = x[p * n_ten * num_types + t * num_types + i]
                    if x_i_val > 0:
                        agep_val = type_of_tuples[i][0]
                        relp_val = str(type_of_tuples[i][1])
                        sex_val = str(type_of_tuples[i][2])
                        ten_val = str(attr_intervalization[3][t])
                        puma_val = str(attr_intervalization[4][p])

                        query = "WITH num_tuples_per_x AS (SELECT * FROM V1 WHERE PUMA10=-1 AND AGEP>=" + str(agep_val[0]) + \
                        " AND AGEP<=" + str(agep_val[1]) + " AND RELP=" + relp_val + " AND SEX=" + sex_val + " LIMIT " + \
                        str(x_i_val) + ") UPDATE V1 SET PUMA10=" + puma_val + ", TEN=" + ten_val + \
                        " FROM num_tuples_per_x WHERE V1.p_id=num_tuples_per_x.p_id"

                        cursor.execute(query)

                        print(
                            query.split("AS ")[1].split(
                                " FROM num_tuples_per_x")[0])
        save_time(datetime.datetime.now())
        # COMPLETE TUPLES WITHOUT ANY ASSIGNMENT IN V1
        cursor.execute(
            "SELECT array(SELECT p_id FROM V1 WHERE PUMA10 = -1 AND TEN = -1)")
        tups_no_assign = list(cursor.fetchone()[0])
        print("\nNumber of tuples without assignment = %s\n" %
              (len(tups_no_assign)))
        for p_id in tups_no_assign:
            p = random.choice(dom_puma)
            t = random.choice(dom_ten)
            cursor.execute("UPDATE V1 SET PUMA10 = " + str(p) + ", TEN = " +
                           str(t) + " WHERE p_id = " + str(p_id))

        io_stats.write("\tAx=b to V1 ---- end time %s\n\n" %
                       (datetime.datetime.now()))
        save_time(datetime.datetime.now())

        # CHECK CC VIOLATIONS FOR CCs
        err = 0
        with open(CC_q_filename, "r") as q:
            for l in q:
                k_v = l.rstrip().split(":")
                pred = k_v[0]
                target_count = int(k_v[1])

                cursor.execute("SELECT COUNT(*) FROM V1 WHERE " + pred)
                ans = cursor.fetchone()[0]

                if target_count != ans:
                    if err == 0:
                        io_stats.write(
                            "\tCCs with target counts minus counts in V1\n")
                    io_stats.write(
                        "\t%5d - %5d = %5d \t%s\n" %
                        (target_count, ans, target_count - ans, pred))
                    err += abs(target_count - ans)
        save_tot_L1_err("%s (CC)\n" % (err))

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)

    io_stats.write(
        "----------------------------------------------------------------------------\n\n\n\n\n"
    )
    io_stats.close()
def main(num_R1_non_key_attrs, S_CC, b_S_CC, p_table, h_table, CC_q_filename,
         output_filename):
    io_stats = open(output_filename, "a")

    io_stats.write("Algo 1.2 ---- start time %s\n" % (datetime.datetime.now()))
    for i in range(len(S_CC)):
        io_stats.write("\t%5d %s\n" % (b_S_CC[i], S_CC[i]))
    io_stats.write("\n\tCCs to Ax=b ---- start time %s\n" %
                   (datetime.datetime.now()))
    save_time(datetime.datetime.now())

    set_num_S_CCs(len(S_CC))
    ''' ----------------------------------------------- List vars ------------------------------------------------ '''
    type_of_tuples = []
    num_types = 0
    n_men = 0
    n = 0

    n_age = 0
    n_ten = 0
    n_puma = 0

    aug_q = []
    aug_counts = []

    A = []
    ''' --------------------------------------------------------------------------------------------------------------
                                    Get attr_intervalization for attributes in R1
    -------------------------------------------------------------------------------------------------------------- '''
    # 1. Store breaking points from the domain of attributes used in S_CC
    for constraint in S_CC:
        for i in range(num_R1_non_key_attrs):
            if constraint[i] != -1:
                for j in range(len(constraint[i])):
                    val = constraint[i][j]
                    if j == 1:
                        val += 1  # [0, 17] makes list [0, 18] for [0, 18), [18, ...)
                    if val not in attr_intervalization[i]:
                        attr_intervalization[i].append(val)
    # 2. Sort and adjust AGEP's intervals
    for i in range(num_R1_non_key_attrs):
        attr_intervalization[i].sort()
        if i == 0:  # AGEP domain is [0, 114]
            if 0 not in attr_intervalization[i]:
                l = [0]
                l.extend(attr_intervalization[i])
                attr_intervalization[i] = l
            if 115 not in attr_intervalization[i]:
                attr_intervalization[i].append(115)
    n_age = len(attr_intervalization[0])

    # Mark which intervals given by AGEP's intervalization are used in S_CC
    agep_intervals_used_in_S_CC = []
    if n_age == 2:  # No AGEP value or [0, 114] specified in S_CC
        agep_intervals_used_in_S_CC.append([0, 114])
    else:
        for i in range(n_age - 1):
            age = attr_intervalization[0][i]
            next_age = attr_intervalization[0][i + 1]
            for constraint in S_CC:
                if (constraint[0] != -1 and age >= constraint[0][0]
                        and age <= constraint[0][1]) and ([
                            age, next_age - 1
                        ] not in agep_intervals_used_in_S_CC):
                    agep_intervals_used_in_S_CC.append([age, next_age - 1])
                    break
    print(
        "\n\nAGEP intervals (post intervalization) that are used in the input set of CCs: %s\n\n"
        % (agep_intervals_used_in_S_CC))
    ''' --------------------------------------------------------------------------------------------------------------
                            Get tuple types and merge using attrs that allow intervals in S_{CC}
    -------------------------------------------------------------------------------------------------------------- '''
    type_of_tuples = get_existing_tuple_types(p_table)
    print("\nType of tuples before binning by intervals = %s" %
          (len(type_of_tuples)))
    io_stats.write("\t\tType of tuples before binning by intervals = %s\n" %
                   (len(type_of_tuples)))

    # Update the agep value to the corresponding interval from agep intervalization
    first_woman = False
    i = 0
    for tup in type_of_tuples:
        if tup[2] == 2 and not first_woman:
            i = 0
            first_woman = True
        u = attr_intervalization[0][i + 1]
        while tup[0] >= u:
            i += 1
            u = attr_intervalization[0][i + 1]
        l = attr_intervalization[0][i]
        u = attr_intervalization[0][i + 1]
        tup[0] = [l, u - 1]
    # Remove duplicates below
    no_dup = []
    for tup in type_of_tuples:
        if tup not in no_dup:
            no_dup.append(tup)
            if tup[2] == 1:
                n_men += 1
    type_of_tuples = no_dup
    num_types = len(type_of_tuples)
    print("Number of tuple types in R1 (using AGEP intervalization) = %s" %
          (num_types))
    io_stats.write(
        "\t\tNumber of tuple types in R1 (using AGEP intervalization) = %s\n" %
        (num_types))
    n = num_types
    ''' --------------------------------------------------------------------------------------------------------------
                            Get active domain for attributes in R2, and total number of vars in IP
    -------------------------------------------------------------------------------------------------------------- '''
    # Once we know the distinct values in TEN and PUMA10 (from S_{CC}), calculate number of variables
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        if len(attr_intervalization[1]) == 0:  # No RELP value used in S_CC
            cursor.execute("SELECT array(SELECT DISTINCT RELP FROM " +
                           p_table + " ORDER BY RELP)")
            attr_intervalization[1] = list(cursor.fetchone()[0])

        if len(attr_intervalization[2]) == 0:  # No SEX value used in S_CC
            cursor.execute("SELECT array(SELECT DISTINCT SEX FROM " + p_table +
                           " ORDER BY SEX)")
            attr_intervalization[2] = list(cursor.fetchone()[0])

        cursor.execute("SELECT array(SELECT DISTINCT TEN FROM " + h_table +
                       " ORDER BY TEN)")
        attr_intervalization[3] = list(cursor.fetchone()[0])
        n_ten = len(attr_intervalization[3])
        if n_ten > 0:
            n = n * n_ten

        cursor.execute("SELECT array(SELECT DISTINCT PUMA10 FROM " + h_table +
                       " ORDER BY PUMA10)")
        attr_intervalization[4] = list(cursor.fetchone()[0])
        n_puma = len(attr_intervalization[4])
        if n_puma > 0:
            n = n * n_puma

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)

    print("\t\tDistinct values (TEN, PUMA10):\t%s\n\t\t%s\n" %
          (attr_intervalization[3], attr_intervalization[4]))
    print("Number of variables = %s" % (n))
    io_stats.write("\n\t\tNumber of variables = %2d\n" % (n))
    ''' --------------------------------------------------------------------------------------------------------------
                                Augment S_{CC} using 3-way (AGEP-RELP-SEX) marginal from R1
    -------------------------------------------------------------------------------------------------------------- '''
    aug_q, aug_counts = get_counts_augment_R1_3way(
        S_CC, agep_intervals_used_in_S_CC, num_R1_non_key_attrs, p_table)
    aug_q.extend(S_CC)
    aug_counts.extend(b_S_CC)
    ''' --------------------------------------------------------------------------------------------------------------
                            Use S_CC and add the corresponding x_i's to populate rows in A and b
    -------------------------------------------------------------------------------------------------------------- '''
    for i in aug_q:
        row = []

        if i[4] != -1:
            idx_puma = attr_intervalization[4].index(i[4][0])
            row = [0] * num_types * n_ten * idx_puma

        if i[0] == -1:
            row_sub = no_agep(i[1], i[2], type_of_tuples, n_men)
        else:
            row_sub = given_agep(i[0], i[1], i[2], type_of_tuples, n_men)

        if i[3] == -1:
            row.extend(row_sub * n_ten)
        elif i[3][0] == 1:
            row.extend(row_sub)
            if n_ten >= 2:
                row.extend([0] * int(n / (n_puma * n_ten) *
                                     (n_ten - 1)))  # Ignore other TENs
        elif i[3][0] == 2:
            row.extend([0] * int(n / (n_puma * n_ten)))
            row.extend(row_sub)
            if n_ten >= 3:
                row.extend([0] * int(n / (n_puma * n_ten) *
                                     (n_ten - 2)))  # Ignore other TENs
        elif i[3][0] == 3:
            row.extend([0] * int(2 * n / (n_puma * n_ten)))
            row.extend(row_sub)
            if n_ten == 4:
                row.extend([0] * int(n / (n_puma * n_ten))
                           )  # Ignore other TENs ... n_ten - 3 is 1 here
        elif i[3][0] == 4:
            row.extend([0] * int(3 * n / (n_puma * n_ten)))
            row.extend(row_sub)

        if i[4] != -1:
            num_pumas_rem = n_puma - attr_intervalization[4].index(i[4][0]) - 1
            row.extend([0] * int(n / n_puma) * num_pumas_rem)
        else:
            row.extend(row * (n_puma - 1))

        A.append(row)

    print("\nSolving IP ....")
    prob = LpProblem("myProblem", LpMinimize)
    ppl_types = [i for i in range(n)]

    ppl_vars = LpVariable.dicts("person", ppl_types, lowBound=0, cat='Integer')
    #print(ppl_vars)

    prob += 0

    for i in range(len(A)):
        prob += lpSum([A[i][j] * ppl_vars[j]
                       for j in ppl_types]) == aug_counts[i], "CC" + str(i)

    status = prob.solve()
    io_stats.write("\t\t******** STATUS = %s\n" % (LpStatus[prob.status]))
    print("\t\t******** STATUS = %s\n" % (LpStatus[prob.status]))

    x = [0 for i in range(n)]
    for v in prob.variables():
        #print(v.name, "=", v.varValue)
        if v.name != '__dummy':
            x[int(v.name.split("_")[1])] = int(v.varValue)

    io_stats.write("\tCCs to Ax=b ---- end time %s\n\n" %
                   (datetime.datetime.now()))
    save_time(datetime.datetime.now())

    io_stats.write("\tAx=b to V1 ---- start time %s\n" %
                   (datetime.datetime.now()))
    save_time(datetime.datetime.now())

    # Create UPDATE queries to run on V_1 in PostgreSQL on the basis of x. We know the values x[i]'s encode.
    try:
        connect_str = conn_str.get_conn_str()
        conn = psycopg2.connect(connect_str)
        cursor = conn.cursor()

        # AGEP, RELP, SEX, TEN, PUMA10
        for p in range(len(attr_intervalization[4])):
            for t in range(len(attr_intervalization[3])):
                for i in range(num_types):
                    x_i_val = x[p * n_ten * num_types + t * num_types + i]
                    if x_i_val > 0:
                        agep_val = type_of_tuples[i][0]
                        relp_val = str(type_of_tuples[i][1])
                        sex_val = str(type_of_tuples[i][2])
                        ten_val = str(attr_intervalization[3][t])
                        puma_val = str(attr_intervalization[4][p])

                        query = "WITH num_tuples_per_x AS (SELECT * FROM V1 WHERE PUMA10=-1 AND AGEP>=" + str(agep_val[0]) + \
                        " AND AGEP<=" + str(agep_val[1]) + " AND RELP=" + relp_val + " AND SEX=" + sex_val + " LIMIT " + \
                        str(x_i_val) + ") UPDATE V1 SET PUMA10=" + puma_val + ", TEN=" + ten_val + \
                        " FROM num_tuples_per_x WHERE V1.p_id=num_tuples_per_x.p_id"

                        cursor.execute(query)

                        print(
                            query.split("AS ")[1].split(
                                " FROM num_tuples_per_x")[0])
        #cursor.execute("SELECT COUNT(*) FROM V1 WHERE PUMA10=-1 OR TEN = -1")
        #print("\n\nNumber of tuples with missing/incomplete V1 assignment = %s\n\n" %(cursor.fetchone()[0]))

        io_stats.write("\tAx=b to V1 ---- end time %s\n\n" %
                       (datetime.datetime.now()))

        # CHECK CC VIOLATIONS FOR CCs
        err = 0
        with open(CC_q_filename, "r") as q:
            for l in q:
                k_v = l.rstrip().split(":")
                pred = k_v[0]
                target_count = int(k_v[1])

                cursor.execute("SELECT COUNT(*) FROM V1 WHERE " + pred)
                ans = cursor.fetchone()[0]

                if target_count != ans:
                    if err == 0:
                        io_stats.write(
                            "\tCCs with target counts minus counts in V1\n")
                    io_stats.write(
                        "\t%5d - %5d = %5d \t%s\n" %
                        (target_count, ans, target_count - ans, pred))
                    err += abs(target_count - ans)
        io_stats.write("Algo 1.2 ---- end time %s\n" %
                       (datetime.datetime.now()))
        save_time(datetime.datetime.now())
        save_tot_L1_err("%s\n" % (err))

        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?\n")
        print(e)

    io_stats.write(
        "----------------------------------------------------------------------------\n\n\n\n\n"
    )
    io_stats.close()
Example #9
0
V1_CC_filename = "V1_CC_dict_all_10_good.txt"
output_filename = "details.txt"

lp.main(CC_q_filename, V1_CC_filename, output_filename)

io_stats = open(output_filename, "a")
io_stats.write(
    "\n\n\tPartition people and households by PUMA10-TEN ---- start time %s\n"
    % (datetime.datetime.now()))
'''------------------------------------------------------------------------------------------------------------------------
                                                h_id UNKNOWN for our problem
------------------------------------------------------------------------------------------------------------------------'''
# CREATE A DICTIONARY THAT PARTITIONS PEOPLE INTO (PUMA, TEN)
l_ppl_in_diff_puma_ten = {}
try:
    connect_str = conn_str.get_conn_str()
    conn = psycopg2.connect(connect_str)
    cursor = conn.cursor()

    cursor.execute("SELECT p_id, TEN, PUMA10 FROM V1")
    rows = cursor.fetchall()
    for row in rows:
        p_id = row[0]
        ten = int(row[1])
        puma = row[2]
        if (puma, ten) not in l_ppl_in_diff_puma_ten:
            l_ppl_in_diff_puma_ten[(puma, ten)] = [p_id]
        else:
            l_ppl_in_diff_puma_ten[(puma, ten)].append(p_id)

    conn.commit()