def cmpcvr_by_one_tasklist(argsdict, tasklist_name): """ This is the primary function to be run inside lambda for cmpcvr. tasklist_name is like "{archive_root}_chunk_{chunk_idx}" """ # set s3 vs local mode -- this probably better done long before this point. DB.set_DB_mode() contests_dod = DB.load_data('styles', 'contests_dod.json') if CVR.data_frame.empty: CVR.load_cvrs_to_df(argsdict) # marks/chunks/{archive_root}_chunk_{chunk_idx}.csv # individual marks chunks. These are kept for cmpcvr if not DB.file_exists(file_name=tasklist_name+'.csv', dirname='marks', subdir="chunks"): utils.sts(f"Logic Error: no marks df missing: {tasklist_name}") traceback.print_stack() sys.exit(1) audit_df = DB.load_data(dirname='marks', subdir="chunks", name=tasklist_name, format='.csv') #--------------------------------------- # primary call of this function performs chunk comparison overvotes_results, disagreed_results, blank_results = compare_chunk_with_cvr( argsdict=argsdict, contests_dod=contests_dod, cvr_df=CVR.data_frame, audit_df=audit_df, chunk_name=tasklist_name, ) #--------------------------------------- """ cmpcvr/chunks/disagreed_{archive_root}_chunk_{chunk_idx}.csv # individual cmpcvr disagreed chunks cmpcvr/chunks/overvotes_{archive_root}_chunk_{chunk_idx}.csv # individual cmpcvr overvote chunks """ DB.save_data(data_item=disagreed_results, dirname='cmpcvr', subdir='chunks', name=f"disagreed-{tasklist_name}.csv") DB.save_data(data_item=disagreed_results, dirname='cmpcvr', subdir='chunks', name=f"overvotes-{tasklist_name}.csv") DB.save_data(data_item=blank_results, dirname='cmpcvr', subdir='chunks', name=f"blanks-{tasklist_name}.csv")
def is_dirname_chunk_built(dirname, group_name, chunk_idx: int, s3flag=None): file_name = create_dirname_chunk_filename(dirname, group_name, chunk_idx) return DB.file_exists(file_name, dirname, subdir='chunks', s3flag=s3flag)