def genbif_from_ballots(argsdict: dict):
    """
    This function is used when no cvr exists and we need to scan all the
    ballots to create bifs. This is a slow process, so we create
    tasklist for lambdas processing.
    """

    if argsdict['use_s3_results']:
        DB.delete_dirname_files_filtered(dirname='bif', s3flag=True, file_pat=None)
        DB.delete_dirname_files_filtered(dirname='bif', subdir='chunks', s3flag=True, file_pat=None)

    # Clear lambda tracker catche
    if argsdict.get('use_lambdas'):
        LambdaTracker.clear_requests()

    max_chunk_size = argsdict.get('genbif_ballots_per_chunk', 200)
    max_concurrency = argsdict.get('max_lambda_concurrency', 1000)
    chunk_limit = argsdict.get('genbif_chunk_limit', None)
    num_archives = len(argsdict['source'])
    max_concurrency = max_concurrency // num_archives

    utils.sts('Generating tasklists to scan ballots to create bifs')
    for archive_idx, source in enumerate(argsdict['source']):
        archive_basename = os.path.basename(source)
        archive = open_archive(argsdict, archive_basename) # will open on s3 directly if using s3
        file_paths = get_image_file_paths_from_archive(archive)
        utils.sts(f"Total of {len(file_paths)} image files in the archive")

        filelist = []
        for index, file_path in enumerate(file_paths):
            _, ballot_file_paths = get_next_ballot_paths(index, archive, file_paths)
            #_, _, ballot_id = analyze_ballot_filepath(ballot_file_paths[0])

            filelist.append( ';'.join(ballot_file_paths) )
        utils.sts(f"Total of {len(filelist)} ballots in the archive")
        archive.close()

        chunks_lol = utils.split_list_into_chunks_lol(item_list=filelist, max_chunk_size=max_chunk_size, max_concurrency=max_concurrency)
        num_chunks = len(chunks_lol)
        utils.sts(f"Split into {num_chunks} chunks with maximum of {max_chunk_size} ballots each.")
        #count = 0
        
        # The loop below may delegate processing to lambdas.
        # Should perform consistency checks here (or before this point) to avoid any costly errors, such as:
        #   1. output bucket specified exists and is writeable.
        # It would be best to make these checks as settings file is initially processed.
        
        
        for chunk_idx, filelist in enumerate(chunks_lol):
            if chunk_limit and chunk_idx >= chunk_limit:
                break
            utils.sts(f"Processing chunk #{chunk_idx} with {len(filelist)} ballots", 3)
            
            build_one_chunk(
                argsdict=argsdict,
                dirname='bif',
                subdir='chunks',
                chunk_idx=chunk_idx, 
                filelist=filelist, 
                group_name=archive_basename, 
                task_name='bif',
                incremental = argsdict['incremental_genbif']
                )   # this may delegate to one lambda
            #count = count+1
            if argsdict['use_lambdas'] and not archive_idx and not chunk_idx and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='bif'):
                    utils.exception_report("task 'bif' failed delegation to lambdas.")
                    sys.exit(1)           


    wait_for_lambdas(argsdict, task_name='bif')      # @@ wait_for_lambdas should be enhanced to track specific tasks or better use SQS messaging.
    
    for archive_idx, source in enumerate(argsdict['source']):
        archive_rootname = os.path.splitext(os.path.basename(source))[0]

        dirname = 'bif'

        DB.combine_dirname_chunks(
            dirname=dirname, subdir='chunks', 
            dest_name=f"{archive_rootname}_{dirname}.csv", 
            file_pat=fr"{archive_rootname}_{dirname}_chunk_\d+\.csv")
            
        logs.get_and_merge_s3_logs(dirname='bif', rootname='log', chunk_pat=fr'{archive_rootname}_{dirname}_chunk_\d+', subdir='chunks')
        logs.get_and_merge_s3_logs(dirname='bif', rootname='exc', chunk_pat=fr'{archive_rootname}_{dirname}_chunk_\d+', subdir='chunks')
def build_dirname_tasks(argsdict, dirname, subdir=None, ballots_per_chunk=200):
    """ with all bif chunks created, scan them and create tasks in dirname.
        each task contains records from bif for ballots to be included
        in the processing chunk. These are written to extraction_tasklists folder.
        For lambdas processing mode, these tasklists could launch an extraction lambda
    """

    utils.sts(f"Building tasklists to {dirname}/{subdir}...", 3)

    bifpaths = get_biflist(argsdict)     # returns either s3path list or pathlist, depending on argsdict['use_s3_results']
    max_concurrency = argsdict.get('max_lambda_concurrency', 1000)

    tasks_queued = 0
    total_ballots_queued = 0
    
    DB.delete_dirname_files_filtered(dirname=dirname, subdir=subdir)

    for bif_pathname in bifpaths:
        utils.sts(f"  Processing bif {bif_pathname}...", 3)
        BIF.load_bif(bif_pathname=bif_pathname)        # uses s3 based on DB.MODE
        bif_basename = os.path.basename(bif_pathname)
        archive_name = re.sub(r'_bif\.csv$', '', bif_basename)

        reduced_df = BIF.df_without_corrupted()
        
        # the following should be moved to bif generation phase (generally not done)
        reduced_df = set_style_from_party_if_enabled(argsdict, reduced_df)

        # the following reduces the ballots selected based on input
        # parameters and whether the ballots have been successfully mapped.
        filtered_df = filter_extraction_ballots(argsdict, reduced_df)
        
        sorted_df = filtered_df.sort_values(by=['cvr_file'])     #ascending - bool or list of bool, default True; inplace - bool, default False


        num_ballots_in_bif = len(BIF.df.index)
        num_to_be_extracted = len(sorted_df.index)
        num_excluded = num_ballots_in_bif - num_to_be_extracted

        utils.sts(f"Total of {num_ballots_in_bif} ballots, {num_to_be_extracted} to be extracted, {num_excluded} ballots excluded.", 3)
        if not num_to_be_extracted:
            continue
            
        chunks_lodf = utils.split_df_into_chunks_lodf(df=sorted_df, max_chunk_size=ballots_per_chunk, max_concurrency=max_concurrency)
        num_chunks = len(chunks_lodf)

        utils.sts(f"Split into {num_chunks} chunks, each with no more than {ballots_per_chunk} ballots each.")

        for chunk_index, chunk_df in enumerate (chunks_lodf):
            chunk_name = f"{archive_name}_chunk_{'%4.4u' % (chunk_index)}.csv"
            utils.sts(f"Creating {dirname} chunk: {chunk_name}...", 3)
            
            DB.save_data(
                data_item=chunk_df, 
                dirname=dirname,
                subdir=subdir,
                name=chunk_name, 
                )
            tasks_queued += 1
            total_ballots_queued += len(chunk_df.index)


    utils.sts(f"Total of {tasks_queued} {dirname} tasks queued with a total of {total_ballots_queued} ballots.", 3)
 def clear_requests(cls):
     cls.lambda_requests = {}
     # Remove lambda_tracker folder so that history is clean
     DB.delete_dirname_files_filtered(dirname='lambda_tracker', s3flag=True)