Ejemplo n.º 1
0
def main(db_conn_str, group_1_id, group_2_id):
    """
    - get group 1 data
    - get group 2 data
    - get stats for each group
    - compare groups
    """
    db_conn = psycopg2.connect(db_conn_str)

    experiment_stats = {}
    ideals_df = pd.read_sql(query_ideals_query(), db_conn)
    group_data = []

    groups = [group_1_id, group_2_id]
    groups = [(group_id, group_name(db_conn, group_id)) for group_id in groups]
    ndcgs = []

    for group_id in [group_1_id, group_2_id]:
        data_df = pd.read_sql(
            group_queries_and_judgments_query(db_conn, group_id, "domain_catalog"),
            db_conn)

        name = group_name(db_conn, group_id) + " " + str(group_id)

        group_data.append(data_df)
        group_stats = stats(data_df, ideals_df)
        experiment_stats.update({name: group_stats})
        ndcgs.append(group_stats["avg_ndcg_at_5"])

    total_differences, unique_qrps = _count_num_diff(group_data[0], group_data[1])
    experiment_stats["num_total_diffs"] = total_differences
    experiment_stats["num_unique_qrps"] = unique_qrps
    experiment_stats["ndcg_delta"] = (ndcgs[1] - ndcgs[0])

    print(simplejson.dumps(experiment_stats, indent=4 * ' '))
Ejemplo n.º 2
0
def main(db_conn_str, group_1_id, group_2_id):
    """
    - get group 1 data
    - get group 2 data
    - get stats for each group
    - compare groups
    """
    db_conn = psycopg2.connect(db_conn_str)

    experiment_stats = {}
    ideals_df = pd.read_sql(query_ideals_query(), db_conn)
    group_data = []

    groups = [group_1_id, group_2_id]
    groups = [(group_id, group_name(db_conn, group_id)) for group_id in groups]
    ndcgs = []

    for group_id in [group_1_id, group_2_id]:
        data_df = pd.read_sql(
            group_queries_and_judgments_query(db_conn, group_id,
                                              "domain_catalog"), db_conn)

        name = group_name(db_conn, group_id) + " " + str(group_id)

        group_data.append(data_df)
        group_stats = stats(data_df, ideals_df)
        experiment_stats.update({name: group_stats})
        ndcgs.append(group_stats["avg_ndcg_at_5"])

    total_differences, unique_qrps = _count_num_diff(group_data[0],
                                                     group_data[1])
    experiment_stats["num_total_diffs"] = total_differences
    experiment_stats["num_unique_qrps"] = unique_qrps
    experiment_stats["ndcg_delta"] = (ndcgs[1] - ndcgs[0])

    print(simplejson.dumps(experiment_stats, indent=4 * ' '))
Ejemplo n.º 3
0
    parser.add_argument('-o', '--outfile', dest='outfile', type=str, required=True,
                        help='Name of CSV file to which data will be written.')
    parser.add_argument('-D', '--db_conn_str', required=True, help='Database connection string')

    args = parser.parse_args()

    db_conn = psycopg2.connect(args.db_conn_str)

    print("Reading metadata")

    fxf_metadata_dict = get_fxf_metadata_mapping(db_conn)

    print("Reading all judged data for group")

    data_df = pd.read_sql(
        group_queries_and_judgments_query(db_conn, args.group_id, "domain_catalog"),
        db_conn)

    print("Counting irrelevants")

    data_df["num_irrelevants"] = data_df["raw_judgments"].apply(
        lambda js: sum([1 for j in js if "judgment" in j and j["judgment"] < 1]))

    data_df = data_df[data_df["num_irrelevants"] >= 2]

    print("Adding metadata to dataframe")

    data_df["metadata"] = data_df["result_fxf"].apply(
        lambda fxf: fxf_metadata_dict.get(fxf, {}))

    print("Extracting dataset names")
Ejemplo n.º 4
0
                        '--db_conn_str',
                        required=True,
                        help='Database connection string')

    args = parser.parse_args()

    db_conn = psycopg2.connect(args.db_conn_str)

    print("Reading metadata")

    fxf_metadata_dict = get_fxf_metadata_mapping(db_conn)

    print("Reading all judged data for group")

    data_df = pd.read_sql(
        group_queries_and_judgments_query(db_conn, args.group_id,
                                          "domain_catalog"), db_conn)

    print("Counting irrelevants")

    data_df["num_irrelevants"] = data_df["raw_judgments"].apply(lambda js: sum(
        [1 for j in js if "judgment" in j and j["judgment"] < 1]))

    data_df = data_df[data_df["num_irrelevants"] >= 2]

    print("Adding metadata to dataframe")

    data_df["metadata"] = data_df["result_fxf"].apply(
        lambda fxf: fxf_metadata_dict.get(fxf, {}))

    print("Extracting dataset names")