def main(db_conn_str, group_1_id, group_2_id): """ - get group 1 data - get group 2 data - get stats for each group - compare groups """ db_conn = psycopg2.connect(db_conn_str) experiment_stats = {} ideals_df = pd.read_sql(query_ideals_query(), db_conn) group_data = [] groups = [group_1_id, group_2_id] groups = [(group_id, group_name(db_conn, group_id)) for group_id in groups] ndcgs = [] for group_id in [group_1_id, group_2_id]: data_df = pd.read_sql( group_queries_and_judgments_query(db_conn, group_id, "domain_catalog"), db_conn) name = group_name(db_conn, group_id) + " " + str(group_id) group_data.append(data_df) group_stats = stats(data_df, ideals_df) experiment_stats.update({name: group_stats}) ndcgs.append(group_stats["avg_ndcg_at_5"]) total_differences, unique_qrps = _count_num_diff(group_data[0], group_data[1]) experiment_stats["num_total_diffs"] = total_differences experiment_stats["num_unique_qrps"] = unique_qrps experiment_stats["ndcg_delta"] = (ndcgs[1] - ndcgs[0]) print(simplejson.dumps(experiment_stats, indent=4 * ' '))
parser.add_argument('-o', '--outfile', dest='outfile', type=str, required=True, help='Name of CSV file to which data will be written.') parser.add_argument('-D', '--db_conn_str', required=True, help='Database connection string') args = parser.parse_args() db_conn = psycopg2.connect(args.db_conn_str) print("Reading metadata") fxf_metadata_dict = get_fxf_metadata_mapping(db_conn) print("Reading all judged data for group") data_df = pd.read_sql( group_queries_and_judgments_query(db_conn, args.group_id, "domain_catalog"), db_conn) print("Counting irrelevants") data_df["num_irrelevants"] = data_df["raw_judgments"].apply( lambda js: sum([1 for j in js if "judgment" in j and j["judgment"] < 1])) data_df = data_df[data_df["num_irrelevants"] >= 2] print("Adding metadata to dataframe") data_df["metadata"] = data_df["result_fxf"].apply( lambda fxf: fxf_metadata_dict.get(fxf, {})) print("Extracting dataset names")
'--db_conn_str', required=True, help='Database connection string') args = parser.parse_args() db_conn = psycopg2.connect(args.db_conn_str) print("Reading metadata") fxf_metadata_dict = get_fxf_metadata_mapping(db_conn) print("Reading all judged data for group") data_df = pd.read_sql( group_queries_and_judgments_query(db_conn, args.group_id, "domain_catalog"), db_conn) print("Counting irrelevants") data_df["num_irrelevants"] = data_df["raw_judgments"].apply(lambda js: sum( [1 for j in js if "judgment" in j and j["judgment"] < 1])) data_df = data_df[data_df["num_irrelevants"] >= 2] print("Adding metadata to dataframe") data_df["metadata"] = data_df["result_fxf"].apply( lambda fxf: fxf_metadata_dict.get(fxf, {})) print("Extracting dataset names")