Python sts Beispiele

Programmiersprache: Python

Namespace / Paketname: utilities.logs

Methode / Funktion: sts

Beispiele auf hotexamples.com: 11

Python sts - 11 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utilities.logs.sts, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: votes_extractor.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def extractvote_by_tasklists(argsdict: dict):
    """
    ACTIVE
    This replaces the extractvotes function.
    given tasklists which exist in the extraction_tasks folder,

    Tasklists are generated by reviewing the BIF tables.
    Each tasklist creates a separate f"marks_{tasklist_name}.csv" file in the results folder.

    """
    logs.sts('Extracting marks from extraction tasklists', 3)

    tasklists = DB.list_files_in_dirname_filtered(dirname='marks', subdir='tasks', file_pat=r'^[^~].*\.csv$', fullpaths=False)
    total_num = len(tasklists)
    utils.sts(f"Found {total_num} taskslists", 3)

    use_lambdas = argsdict['use_lambdas']

    if use_lambdas:
        LambdaTracker.clear_requests()
        #clear_instructions(config_d.TASKS_BUCKET, Job.get_path_name())

    biflist = get_biflist(no_ext=True)

    for bif_idx, bifname in enumerate(biflist):
        archive_name = re.sub(r'_bif', '', bifname)
        genmarks_tasks = [t for t in tasklists if t.startswith(archive_name)]
    
        for chunk_idx, tasklist_name in enumerate(genmarks_tasks):
        
            #----------------------------------
            # this call may delegate to lambdas and return immediately
            # if 'use_lambdas' is enabled.
            # otherwise, it blocks until the chunk is completed.
            
            build_one_chunk(argsdict, 
                dirname='marks', 
                chunk_idx=chunk_idx, 
                filelist=[tasklist_name], 
                group_name=bifname,
                task_name='extractvote', 
                incremental=False)

            #----------------------------------

            if not chunk_idx and not bif_idx and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='extractvote'):
                    utils.exception_report("task 'extractvote' failed delegation to lambdas.")
                    sys.exit(1)           

    wait_for_lambdas(argsdict, task_name='extractvote')

    utils.combine_dirname_chunks_each_archive(argsdict, dirname='marks')
    logs.get_and_merge_s3_logs(dirname='marks', rootname='log', chunk_pat=r"_chunk_\d+", subdir="chunks")
    logs.get_and_merge_s3_logs(dirname='marks', rootname='exc', chunk_pat=r"_chunk_\d+", subdir="chunks")

Beispiel #2

Datei anzeigen

Datei: LambdaTracker.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def lambda_report_status(task_args, request_id, status, error_info=None):

    tracker_s3path = create_lambda_tracker_s3path_by_task_args(task_args, status)
    buff = json.dumps({
        "request_id":   request_id,
        "status":       status,
        "error_info":   error_info,
        'task_args':    task_args,
    })
    s3utils.write_buff_to_s3path(tracker_s3path, buff)
    # log to cloudwatch in case if there is any error for tracking
    if error_info:
        print(buff)
    logs.sts(f"Tracker file written with status='{status}'", 3)

Beispiel #3

Datei anzeigen

    def get_style_nums_with_templates_s3(argsdict):
        """Return list of style_nums based on styles found in S3 bucket.
        """
        logs.sts("Compiling style_nums with templates:", 3)
        from aws_lambda import s3utils

        s3dirpath = DB.dirpath_from_dirname('styles', s3flag=True)
        s3dict = s3utils.parse_s3path(s3dirpath)
        prefix = s3dict['prefix']

        style_nums = s3utils.list_files_in_prefix_s3(
            s3dirpath, file_pat=fr".*{prefix}([^/]+)/.*\-template\-1\.png$")

        logs.sts(f"Total of {len(style_nums)} style_nums found.", 3)
        return style_nums

Beispiel #4

Datei anzeigen

Datei: literal_fuzzy_matching_utils.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def fuzzy_compare_str(correct_str,
                      ocr_str,
                      thres=80,
                      justify='full',
                      method='levdist') -> tuple:  #bool, metric
    """ 
    compare a known correct string with an ocrd string that may have mistakes.
    justify can be 'left', 'right' or 'full'
    """
    p_correct_str = correct_str.replace("\n", " ")[:50]
    p_ocr_str = ocr_str.replace("\n", " ")  #[:50]
    logs.sts(f"fuzzy_compare_str justify: {justify}:\n"
             f"correct: '{p_correct_str}'\n"
             f"ocr:     '{p_ocr_str}'")

    if method == 'regex':
        """ This algorithm assumes no special characters in the correct string.
            and it is relatively greedy.
            first, correct string is scanned to create a regex specifier.
            then, the ocrd string is compard with the regex specified string.
        """
        regexc = make_fuzzy_regex(correct_str)
        return regexc.match(ocr_str), None
    if method == 'table':
        match_val = compare_words(correct_str, ocr_str)
        return match_val > thres, None
    if method == 'levdist':
        min_len = min(len(correct_str), len(ocr_str))
        if justify == 'left':
            local_ocr_str = ocr_str[:min_len]
            #local_cor_str = correct_str[:min_len]
        elif justify == 'right':
            local_ocr_str = ocr_str[-min_len:]
            #local_cor_str = correct_str[-min_len:]
        else:
            local_ocr_str = ocr_str
            #local_cor_str = correct_str

        match_val = lev.ratio(correct_str, local_ocr_str)
        lv = "%1.5f" % match_val
        logs.sts(f" levratio = {lv}", 3)
        return match_val >= thres, match_val

    print(f"Logic Error: Unrecognized method:{method}\n")
    traceback.print_stack()
    sys.exit(1)

Beispiel #5

Datei anzeigen

def post_gentemplate_cleanup(argsdict):
    # this portion of the above function has been separated to allow for individual testing.

    # normally, we combine chunks, but in the case of styles generation, this is not needed except for roismap.

    logs.sts("gentemplates_by_tasklists completed.\n", 3)
    
    #import pdb; pdb.set_trace()

    if argsdict['include_maprois']:
        #styles_completed = DB.list_subdirs_with_filepat('styles', file_pat=r'\.json$', s3flag=None)
        #attempted_but_failed_styles = [s for s in styles_on_input if s not in styles_completed]

        logs.sts("Combining roismap for each style into a single .csv file.", 3)
        DB.combine_dirname_chunks(dirname='styles', subdir="roismap", dest_name='roismap.csv', file_pat=r'_roismap\.csv')

        good_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_good_maps')
        fail_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_failed_maps')
        
        logs.sts(f"{good_map_num} styles successfully mapped; {fail_map_num} styles did not fully map.", 3)
    
    # style logs are placed in one folder in styles
    # logs are like exc_11010_styles_chunk_84.txt
    # downloads file_pat=fr"{rootname}_{chunk_pat}\.txt"
    logs.get_and_merge_s3_logs(dirname='styles', rootname='log', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs')
    logs.get_and_merge_s3_logs(dirname='styles', rootname='exc', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs')

Beispiel #6

Datei anzeigen

Datei: bif_utils.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def filter_extraction_ballots(argsdict, reduced_df):
    """ given df including a reduced set of ballots from BIF,
        further reduce these ballots to those that have styles
        mapped and meet input parameter specifications.
    """

    logs.sts(f"Total number of ballots in BIF: {len(reduced_df.index)}", 3)

    all_mapped_styles = DB.get_style_nums_with_templates(argsdict)
    utils.sts(f"There are a total of {len(all_mapped_styles)} styles mapped.", 3)
    
    included_styles = argsdict.get('include_style_num')
    if included_styles:
        filtered_styles = [i for i in all_mapped_styles if i in included_styles]
        utils.sts(f"The settings file includes list of styles to include. Filtered to {len(filtered_styles)} styles.", 3)
    else:
        filtered_styles = all_mapped_styles

    excluded_styles = argsdict.get('exclude_style_num')
    if excluded_styles:
        filtered_styles = [i for i in filtered_styles if not i in excluded_styles]
        utils.sts(f"The settings file includes list of styles to exclude. Filtered to {len(filtered_styles)} styles.", 3)

    if excluded_styles or included_styles:
        reduced_df = reduced_df[reduced_df['style_num'].isin(filtered_styles)]

    if not argsdict['include_bmd_ballot_type']:
        reduced_df = reduced_df.loc[reduced_df['is_bmd'] != 1]
        utils.sts(f"The settings file excludes BMD ballots. Filtered to {len(reduced_df.index)} styles.", 3)
    if not argsdict['include_nonbmd_ballot_type']:
        reduced_df = reduced_df.loc[reduced_df['is_bmd'] == 1]
        utils.sts(f"The settings file excludes nonBMD ballots. Filtered to {len(reduced_df.index)} styles.", 3)

    logs.sts(f"Total number of ballots after filters applied for extraction: {len(reduced_df.index)}", 3)


    return reduced_df

Beispiel #7

Datei anzeigen

def gentemplates_by_tasklists(argsdict):
    """
    ACTIVE
    This replaces the gentemplates function.
    given tasklists which exist in the tasklist folder,
    read each in turn and if the number of ballots included meet a minimum,
    process each line item in turn.
    The style is the name of the tasklist.

    Tasklists are generated by reviewing the BIF tables.
    
    Each delegetion to lambdas (or performed locally) will include 
    subprocesses according to the argsdict parameters:
    
        include_gentemplate_tasks       - include the generation of tasklists prior to delegation.
        use_single_template_task_file   - means a single JSON file will be created instead of separate task files on s3
                                            and a portion of that task list will be passed to each lambda
        include_gentemplate             - for each style, combine ballots to create a base template
        include_genrois                 - generate regions of interest (ROIs) and OCR
        include_maprois                 - map the official contest names to what is read on the ballot to create roismap
        

    
    """
    styles_on_input = []
    #attempted_but_failed_styles = []   # will need to determine by looking for templates

    utils.sts('Generating style templates from a combined set of ballot images', 3)

    # this loads and parses the EIF
    contests_dod = create_contests_dod(argsdict)
    #DB.save_style(name='contests_dod', style_data=contests_dod)
    DB.save_data(data_item=contests_dod, dirname='styles', name='contests_dod.json')

    # style_to_contests_dol
    # if the CVR is available, we can get a list of styles that are associated with a ballot_type_id.
    # this may be enough to know exactly what contests are on a given ballot, but only if the 
    # style which keys this list is also directly coupled with the card_code read from the ballot.
    # In some cases, such as Dane County, WI, this is a 1:1 correspondence. But SF has an complex
    # style conversion which is nontrivial to figure out. 
    # thus, this is still needed in style discovery.

    style_to_contests_dol = DB.load_data(dirname='styles', name='CVR_STYLE_TO_CONTESTS_DICT.json', silent_error=True)
    if not style_to_contests_dol:
        logs.sts("CVR_STYLE_TO_CONTESTS_DICT.json not available. Trying to convert CVR to styles", 3)
        style_to_contests_dol = convert_cvr_to_styles(argsdict, silent_error=True)
        if not style_to_contests_dol:
            logs.sts("Unable to convert CVR to style_to_contests_dol, trying manual_styles_to_contests", 3)
            style_to_contests_dol = get_manual_styles_to_contests(argsdict, silent_error=True)

        if style_to_contests_dol:
            DB.save_data(data_item=style_to_contests_dol, dirname='styles', name='CVR_STYLE_TO_CONTESTS_DICT.json')
            
    if not style_to_contests_dol:
        logs.sts("style_to_contests_dol unavailable. full style search is required.", 3)

    if argsdict.get('use_lambdas'):
        LambdaTracker.clear_requests()

    first_pass = True

    if argsdict['use_single_template_task_file']:
        template_tasklists_dolod = DB.load_data(dirname='styles', name="template_tasklists_dolod.json")
        total_num = len(template_tasklists_dolod)
        utils.sts(f"Found {total_num} taskslists", 3)
        
        for chunk_idx, (style_num, style_lod) in enumerate(template_tasklists_dolod.items()):
            if not style_num: continue
            
            if argsdict.get('include_style_num') and style_num not in argsdict['include_style_num'] or \
                argsdict.get('exclude_style_num') and style_num in argsdict['exclude_style_num']:
                continue
            
            styles_on_input.append(style_num)

            if argsdict.get('incremental_gentemplate', False) and DB.template_exists(style_num):
                utils.sts(f"Style {style_num} already generated, skipping...", 3)
                continue
                
            utils.sts(f"Processing template for style {style_num} #{chunk_idx}: of {total_num} ({round(100 * (chunk_idx+1) / total_num, 2)}%)")

            # the function call below will delegate to lambdas if use_lambdas is True.
            build_one_chunk(argsdict,
                dirname='styles', 
                subdir=style_num,
                chunk_idx=chunk_idx, 
                filelist=[style_lod],            # only one style per lambda chunk, but can execute gentemplate, genrois, and maprois for same style.
                group_name=style_num, 
                task_name='gentemplate', 
                incremental=False,
                )

            if argsdict['use_lambdas'] and first_pass and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='gentemplate'):
                    utils.exception_report("task 'gentemplate' failed delegation to lambdas.")
                    sys.exit(1)           
                first_pass = False
            # if not generate_template_for_style_by_tasklist_df(argsdict, style_num, tasklist_df):
                # attempted_but_failed_styles.append(style_num)
        
    else:    
        tasklists = DB.list_files_in_dirname_filtered(dirname='styles', subdir="tasks", file_pat=r'.*\.csv', fullpaths=False)
        total_num = len(tasklists)
        utils.sts(f"Found {total_num} taskslists", 3)

        for chunk_idx, tasklist_name in enumerate(tasklists):
            if tasklist_name == '.csv': continue
            
            style_num = os.path.splitext(os.path.basename(tasklist_name))[0]
            styles_on_input.append(style_num)

            if args.argsdict.get('incremental_gentemplate', False) and DB.template_exists(style_num):
                utils.sts(f"Style {style_num} already generated, skipping...", 3)
                continue
                
            utils.sts(f"Processing template for style {style_num} #{chunk_idx}: of {total_num} ({round(100 * (chunk_idx+1) / total_num, 2)}%)")

            # the function call below will delegate to lambdas if use_lambdas is True.
            build_one_chunk(argsdict,
                dirname='styles', 
                chunk_idx=chunk_idx, 
                filelist=[tasklist_name], 
                group_name=style_num, 
                task_name='gentemplate', 
                incremental=False,
                )
            if argsdict['use_lambdas'] and first_pass and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='gentemplate'):
                    utils.exception_report("task 'gentemplate' failed delegation to lambdas.")
                    sys.exit(1)           
                first_pass = False

    wait_for_lambdas(argsdict, task_name='gentemplate')
    post_gentemplate_cleanup(argsdict)

Beispiel #8

Datei anzeigen

def main():
    utils.show_logo()
    print(  f"\n\n{'=' * 50}")

    argsdict = args.get_args()          # parses input_file as specifed in CLI using arg_specs.csv
    args.argsdict = argsdict
    
    print("argsdict:")
    print(pprint.pformat(argsdict))

    print(  f"\n\n{'=' * 50}")

    if (argsdict.get('self_test')):
        self_test.self_test(argsdict)


    """ The paths of archives is normalized to allow the archives to be either local or on s3.
        'archives_folder_path' -- path to folder on local system.
        'archives_folder_s3path' -- s3path to folder on s3
        'source' list are basenames, without path, but including extension.
        
    """


    # if argsdict['archives_folder_path'] and not argsdict['source']:
        # # create a list of source archives in the source folder.
        # srcdict = {}
        # dirdict = utils.get_dirdict(argsdict['archives_folder_path'], '.zip')
        # for name, path in dirdict.items():

            # if (name in argsdict['exclude_archives'] or
                # argsdict['include_archives'] and not name in argsdict['include_archives']):
                # continue
            # srcdict[name] = path

        # argsdict['source'] = list(srcdict.values())
        # argsdict['srcdict'] = srcdict
        # utils.sts(f"input directive 'source' resolved to: {argsdict['source']}", 3)

    op = argsdict.get('op', 'all').lower()
    
    DB.set_DB_mode()
    
    """ =======================================================================
        PRIMARY API ENTRY POINTS
        
        Each one of the following relies on a job file which provides the settings
        as parameter,value in csv file, where comments are allowed preceded by #.
        Thus the api must provide 
            -i path             location of settings file -- could be file on s3.
            -op operation       string like 'genbif_from_cvr'
            
        Each function produces:
            log.txt                 appends extensive status reports.
            exception_report.txt    appends each exception encountered. 
                                        exceptions to processing and not python exceptions, per se.
                                        
            as well as other files, noted below.
            
        Initial implementation will include one major intry point with operation selection as follows:
            'genbif_from_cvr'           (Fast)
            'genbif_from_ballots'       (Slow)
            'create_bif_report'         (Fast)
            'gentemplates'              (Slow)
            'genmaprois'                (Somewhat slow)
            'extractvote'               (Very slow)
            'genreport'                 (fast)
            'cmpcvr_and_report'         (somewhat slow)
            'get_status'                (fast) - return status of slow functions.    
                op='get_status' ref='function'
                    where function = one of 'genbif_from_ballots', 'gentemplates', 'genmaprois', 'extractvote'
            
        In the functions below, argsdict is established from the settings file.
        
    """

    if op == 'copy_config_files_to_s3':
        """ This function will copy local config files in EIFs to s3, to simulate
            interaction with the frontend website, which will upload and place files
            s3://us-east-1-audit-engine-jobs/{job_name}/config/ 
            
            Files to be placed there:
                JOB settings file
                EIF file
                BOF file
                manual_styles_to_contests
                style_lookup_table
                
            In local mode running these are in either EIFs/ or input_files/ in repo folder.
                
        """
        DB.upload_file_dirname('config', argsdict['eif'])
        DB.upload_file_dirname('config', argsdict['bof'])
        DB.upload_file_dirname('config', argsdict['manual_styles_to_contests_filename'])
        DB.upload_file_dirname('config', argsdict['style_lookup_table_filename'])
        DB.upload_file_dirname('config', argsdict['input'], local_dirname='input_files')
            
        
        
        
    elif op == 'precheck_job_files':
        """ This function simply does a precheck of the job files that exist
            in the config folder for this job on s3.
        """
        pass
    
    
    
    
    
    
    elif op == 'genbif_from_cvr':
        """ 
        If CVR file(s) are provided with style information included, 
        this operation builds "ballot information file" BIF data by reviewing the CVR
        May also use path information of ballots in archives for precincts, groups, party.
        For Dominion, scan CVR JSON chunks and fill in info about ballots.
        Creates one .csv file for each archive in folder bif.
        This is a relatively fast operation that can be completed typically in a matter of seconds
        Result:
            BIF data file ready for BIF report.
            log
            exception report
        """
        genbif_from_cvr(argsdict)


    elif op == 'genbif_from_ballots':
        """ 
        If no CVR is available, we must scan the ballots to generate the bif.
        Each ballot is reviewed and style information is read from the ballots.
        May also use path information of ballots in archives for precincts, groups, party.
        This can be done by lambdas and should complete within minutes but
        typically will not complete during a single REST post/response.
        Result:
            BIF ready to produce BIF report.
            separate folder for each failing ballot to allow investigation.
            log
            exception report
        """
        genbif_from_ballots(argsdict)
        
    # elif op == 'get_status':
        # """ This function provides status operation in terms of % complete.
        # """
        # if ref == 'genbif_from_ballots':
            # return get_status_genbif_from_ballots(argsdict)
        # elif ref == 'gentemplates':
            # return get_status_gentemplates(argsdict)
        # elif ref == 'genmaprois':
            # return get_status_genmaprois(argsdict)
        # elif ref == 'extractvote':
            # return get_status_extractvote(argsdict)
        # else:
            # utils.sts(f"ref '{ref}' not supported by op=get_status", 3)

    elif op == 'create_bif_report':
        """ 
        as a result of validate_bifs or genbif_from_ballots, this report is 
        generated, or it can be generated once the BIF is built. Report provides:
            Number of Ballot Archives
            Total number of BIF records
            Unique ballot_ids
            Duplicate ballot_ids
            Number of CVR files
            Number of precincts
            Number of parties
            Number of style_nums
            Number of card_codes
            Number of ballots w/o card_codes
            Number of BMD ballots
            Number of corrupted ballots (could not be read)
            Number of different sheets
            Number of each sheet
        
        This operation completes quickly and currently produces a text report to console.
        Can provide alternative data output as JSON or HTML through command line switch.
            
        """
        create_bif_report(argsdict)
        
    elif op == 'build_template_tasklists':
        """ 
        Scan bifs and generate template tasklists, with one tasklist csv file per style.
        tasklist is the same format as bif but should not be updated with any information.
        This generally not used as REST entry point.
        """
        build_template_tasklists(argsdict)

    elif op == 'gentemplates':
        """ this function requires that BIF data is available. Used as REST entry point.
            1. generates template tasklists
            2. contructs templates by combining usually 50 ballots to improve resolution.
            Result is a set of raw templates (PNG files), one for each style,
            and possibly also checkpoint images including the components (up to 50).
            
            This function takes significant time, of more than a minute per style. 
            However, this can be delegated to lambdas and may be completed 
            in (# styles/1000) * time per style, but still too long for single REST POST.
            For Dane County, WI, with 191 styles, it still takes at least a minute.
            If all 10,000 styles are used in SF, time is 10 minutes.
            
            Log file updated.
            Report generated of result.
            PNG files for review of each style.
        """
        if argsdict['include_gentemplate_tasks']:    # sub tasks in gentemplate action - generate base templates
            build_template_tasklists(argsdict)
            
        gentemplates_by_tasklists(argsdict)

    elif op == 'gentemplates_only':
        """ This function used for debugging only when tasklists are already generated.
            Tasklists take only seconds to complete now.
            NOT USED IN REST API
        """
        gentemplates_by_tasklists(argsdict)

    elif op == 'genrois':
        """
        After templates are generated, each style is image-analyzed and then OCR'd.
        Result is set of PNG images providing regions of interest (ROIs) determined.
        Style templates must be generated at this point to allow further analysis and generation of rois
        The json list of rois and the image for each result.
        
        Result:
            Creates a report of rois generated
            PNG image files with graphic outlines of rois that can be reviewed by the user.
        """
        genrois(argsdict)

    elif op == 'maprois':
        """
        Once Rois are generated, they can be fairly quickly mapped to contests and options based on information
        in the EIF - Election Information File. This operates at the rate of several seconds per style.
        Result is 
            PNG "redlines" showing the mapping of contests and options to each style.
            Map report, providing detail of where mapping may have gotten off track.
            Log.
        """
        maprois(argsdict)

    elif op == 'genmaprois':
        """ 
        Major REST entry point.
        This the most typical operation once templates have been generated, which may take
        time and use compute resources. May need to be done repetitively while operator makes
        changes to settings file. Operator must review the map report and redlines.
        Once review is completed, then extraction can commence.
        Can break this up for processing by lambdas but it is so fast now that it may not be necessary.
        Result is:
            PNG images showing ROIS from genrois
            PNG redlines showing the correspondence of contests and options for each style.
            failures copied to assist folder
            Map Report
            Log
        """
    
        genrois(argsdict)
        maprois(argsdict)

    elif op == 'get_assist_requests':
        """ 
        After genmaprois is completed, some styles may need manual assistance by human operator.
        This is used in graphic-mode dominant rois generation rather than OCR dominant generation.
        Front end first requests assist requests, and the response is
            list of ballot_ids which needs assistance.
            path to each template file
            path to existing json file for that template.
            
        NOTE this is a new function which is not implemented yet.
        """
        pass
        
    elif op == 'write_new_assist_annotation':
        """ The front end will implement functionality like is implemented by 
            tools/template_edit.py, to allow the user to add rectangular regions,
            horizontal and vertical lines, to the image.
            Then, this writes a new JSON annodation file.
            Maybe this does not need to be provided if frontend can write to s3 directly.
        
        NOTE this is a new function which is not implemented yet, but is implemented
            for CLI operation as 'template_edit' using tools/template_edit.py
        """
        pass
        
    elif op == 'build_extraction_tasks':
        """ Scan bifs and generate extraction tasklists, with an appropriate number of ballots for each lambda.
            tasklist is the same format as bif and should not be updated with any information by lambda.
            This function completes rapidly and thus is combined with actual extraction.
        """
        build_extraction_tasks(argsdict)

    elif op == 'extractvote_only':
        """ with extraction tasklists already built, go through all the ballots in the 
            archives and extract the marks into single csv data table for each tasklist, 
            and then combine into a single csv file for each archive.
            Each tasklist is delegated to a separate lambda process.
            Each lambda can take up to 15 minutes to process one tasklist. Total time of this
            process is less than (# ballots / 200,000) * 15 minutes.
            So for a county like SF, with 500K ballots, upper limit is about 35 minutes.
            LA, the largest county in the US has about 6 million ballots, upper limit is 7.5 hours.
        """
        extractvote_by_tasklists(argsdict)
        #extractvote(argsdict)

    elif op == 'extractvote':
        """ Build extraction tasklists and then extract vote 
            Perform both the tasklist generation (fast) and extraction (slow) above.
            This is the normal REST entry point.
            Result is 
                marks_df.csv for each archive.
                Extraction Report
                Log
                Exception Report
        """
        # go through all the ballots in the archives and extract the marks into single json file for each archive
        build_extraction_tasks(argsdict)
        extractvote_by_tasklists(argsdict)

    elif op == 'genreport':
        """
        Once extraction is completed, a report of results can be produced independent of the voting 
        system results, or CVR. Can be compared with high-level election results.
        
        Result:
            summary of the election results per audit system.
            Includes total number of ballots:
                not processed by audit system due to misalignment or other corruption.
                not provided in archives.
            Compares with high-level election result.
            
        """
        genreport(argsdict)

    elif op == 'cmpcvr':
        """ If a CVR is available and the voting system evaluation of each ballot
            is provided, then this function compares the audit system result with
            the voting system cvr and provides a comprehensive result.
            This function processes each marks_df.csv that corresponds to each archive, and
            compares each record with CVR, which is fully combined into one data file by this
            function.
            Result:
                cmpresult_n.csv for each archive n processed.
                This file is not combined to a single report.
        """
        cmpcvr_by_tasklists(argsdict)

    elif op == 'gen_cmpcvr_report':
        """ 
        The result of cmpcvr is on an archive-by-archive basis and compares
        the combined CVR, which is generally not organized by archive, with the 
        marks_df.csv which are organized by archive. Creates a ballot-by-ballot
        comparison result on per-archive basis as csv file. Includes any 
        adjudications in the determination of discrepancies.
        Result:
            comprehensive report of the comparison, as JSON or text.
            JSON discrepancy list reduced to just the discrepancies.
            
        """
        generate_cmpcvr_report(argsdict)
        
    elif op == 'cmpcvr_and_report':
        """
        This is a major REST entry point.
        compares the CVR and creates a report by combining the above two functions.
        """
        cmpcvr_by_tasklists(argsdict)
        generate_cmpcvr_report(argsdict)
       
        
    elif op == 'get_discrepancy_list':
        """ new function for front end. After cmpcvr is completed, a full report is created. 
            This provides just the discrepancies to allow for adjudication in frontend UI,
            and the existing adjudication JSON file.
            This is a new function.
            Result:
                JSON list of discrepancies
                log updated.
            NOTE: THIS IS A NEW FUNCTION
        """
        pass
        
    elif op == 'submit_adjudications':
        """ front end will implement a review of all discrepancies and provides
            a DRE-like entry of votes as determined by review of ballot images
            This is a new function.
            Perhaps front end updates the adjudication file but this function 
            may be better so the action is properly logged.
            Results:
                status
                log updated.
            NOTE: THIS IS A NEW FUNCTION
        """
        pass

    # =============================================================================
    #    Updates the lambdas functions.
    # =============================================================================
    
    elif op == 'update_lambda' or op == 'update_lambdas':

        branch = argsdict.get('update_branch', 's3-and-lambdas-dev')

        """ to run this function, you must first delete the tree 'lambda_deploytment'
            including the folder.
        """
        
        function_name = argsdict.get('lambda_function', 'all')
        if function_name == 'all':
           update_lambda(update_all=True, branch=branch)
        else:
            update_lambda(function_name=function_name, branch=branch)

    # =============================================================================
    #    Additional operations only used for development and CLI operation.
    # =============================================================================
    
    elif op == 'post_gentemplate_cleanup':
        post_gentemplate_cleanup(argsdict)
    
    # elif op == 'combine_bif_chunks':
        # """ used for testing combining bif chunks
        # """
        # utils.combine_dirname_chunks_each_archive(argsdict, dirname='bif')
        
        
    elif op == 'get_manual_styles_to_contests':
    
        logs.sts("Processing manual_styles_to_contests", 3)
        style_to_contests_dol = get_manual_styles_to_contests(argsdict, silent_error=True)
        
        logs.sts(f"style_to_contests_dol:\n {pprint.pformat(style_to_contests_dol)}")

        if style_to_contests_dol:
            DB.save_data(data_item=style_to_contests_dol, dirname='styles', name='CVR_STYLE_TO_CONTESTS_DICT.json')


    elif op == 'web2eif':
        """
        This operation scrapes from a url provided a high-level report of results.
        It was thought at the time that this report would provide unique contest names
        and consistent option names, but even though they were shorter and a bit better
        than the CVR, they also were insufficient for our needs. Thus, althought this
        does provide a basic function, it is not up to date with the current EIF format
        and does not eliminate the need for the EIF and manual editing.
        RESEARCH ONLY.
        """
        web_scraper.run_scraper(url=argsdict['url'])
        sys.exit()

    #elif op == 'tidycvr':
    #    """ This operation converts and ES&S cvr to tidy format
    #    Although it is operational, it was found that the existing ES&S format was
    #    a reasonably consice and useful format and we would work with it.
    #    """
    #    tidy_ess_cvr(argsdict)
    #    sys.exit()

    elif op == 'cvr2styles':
        """
        DEPRECATED. Use validate_bifs or genbif_from_ballots
        This operation preprocesses an ES&S CVR file or multiple Dominion CVR files.
        creates two dicts:
        styles_dict, which provides contest list for each style_num
        ballotid_to_style dict, which provides style_num based on ballotid.
        This currently only works if the CVR has a column providding the style named 'Ballot Style'
        Would need a different approach if no Ballot Style column is provided, such as
            creating a logical style iD, perhaps bitstring of contests, and use that as a logcal style identifier.
            This would not match to any style designator on the ballot.
        Proceses multple CVR files one at a time. (scalable)

        convert_cvr_to_styles function is in styles_from_cvr_converter.py
        for dominion, get_styles_to_contests_dominion is in gentemplate.py
        """
        convert_cvr_to_styles(argsdict)

    elif op == 'gentrm':
        gentemplates_by_tasklists(argsdict)
        genrois(argsdict)
        maprois(argsdict)

    elif op == 'tltrm':
        build_template_tasklists(argsdict)
        gentemplates_by_tasklists(argsdict)
        genrois(argsdict)
        maprois(argsdict)

    elif op == 'alltemplates':
        """
        Perform all the steps to creation of templates
        """
        genbif_from_cvr(argsdict)
        build_template_tasklists(argsdict)
#        convert_cvr_to_styles(argsdict)
        gentemplates_by_tasklists(argsdict)
        genrois(argsdict)
        maprois(argsdict)

    # elif op == 'download_results':
        # # download all results from s3 bucket.
        # s3utils.download_entire_dirname(argsdict, dirname='marks')
        # s3utils.get_and_merge_lambda_logs(argsdict)

    elif op == 'download_gentemplates':
        # download all gentemplates from s3 bucket.
        # NOT UPDATED TO NEW FILE STRUCTURE
        DB.download_entire_dirname(dirname='styles')
        #DB.download_entire_dirname(dirname='styles')

    elif op == 'delete_s3_results':
        # delete all results on s3 bucket.
        DB.delete_s3_results(argsdict)

    elif op == 'merge_results':
        """ merge results into single csv file.
        """
        utils.merge_results()

    elif op == 'check_extraction':
        check_extraction(argsdict)

    elif op == 'extractcmp':
        build_extraction_tasks(argsdict)
        extractvote_by_tasklists(argsdict)
        cmpcvr_by_tasklists(argsdict)
  
    # elif op == 'getlogs':
        # DB.get_and_merge_s3_logs()

    elif op == 'plotmetrics':
        plotmetrics()

    elif op == 'evalmarks':
        evalmarks()

    elif op == 'save_failing_ballots':
        # given list of ballots in inputfile, copy the original ballot image files
        # to (jobname)/styles/(ballot_id) folders
        
        # this function
        #   1. builds single bif table.
        #   2. looks each ballot up.
        #   3. using entry, opens the indicated archive and extracts the original file.
        #   4. saves the file in folder of jobname and ballot_id in styles, see above.
        save_failing_ballots(argsdict)

    elif op == 'reprocess_failing_ballots':
    
        reprocess_failing_ballots(argsdict)


    else:
        print("op value not defined ", op)
        sys.exit()

Beispiel #9

Datei anzeigen

Datei: Ballot.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

    def read_style_num_from_barcode(self, argsdict):
        """
        if ballot.style_num is defined, then use it, otherwise:
        given np.array of image, read ES&S barcode and decode it.
        return style_num as str if successful else None
        typical usage:
        style_num = read_style_from_image(image)
            may return None if there is an underlying error.
        """

        logs.sts("Reading style_num from ballot barcode...", 3)
        ballot_id = self.ballotdict['ballot_id']

        ballot_style_overrides_dict = args.get_ballot_style_overrides(argsdict)

        if self.ballotdict['vendor'] == 'Dominion':
            if self.ballotdict['card_code'] is None:
                # This situation exists if there was a problem converting the barcode during alignment.

                self.ballotdict['style_num'] = None
            elif argsdict['conv_card_code_to_style_num']:
                #attempt to convert card_code to the official style_num which should match CVR style field.
                # if ballot_type_id or card_code cannote be read, then this may return None
                self.ballotdict[
                    'style_num'], _ = dominion_build_effective_style_num(
                        argsdict, self.ballotdict['card_code'])
            else:
                self.ballotdict['style_num'] = self.ballotdict['card_code']

            if self.ballotdict['style_num'] is None:
                utils.exception_report(
                    f"### EXCEPTION: card_code not read from ballot:{ballot_id}. "
                )
                return None

        elif self.ballotdict['vendor'] == 'ES&S':
            card_code = read_raw_ess_barcode(self.ballotimgdict['images'][0],
                                             ballot_id)
            self.ballotdict['card_code'] = style_num = card_code

            from utilities.bif_utils import read_pstyle_from_image_if_specd
            self.ballotdict['pstyle_num'] = read_pstyle_from_image_if_specd(
                argsdict, self.ballotimgdict['images'][0])

            # style num must be a string
            if argsdict['conv_card_code_to_style_num']:
                # converting the card_code to the style number is important to link it to the
                # style number as used on CVR. If no CVR is used, or if we are not attempting to link them
                # then using the card_code directly occurs when 'conv_card_code_to_style_num' is False
                cc_style_num = str(
                    barcode_parser.get_parsed_barcode(
                        card_code, ballot_id, self.ballotdict['precinct']))
                self.ballotdict['ballot_type_id'] = cc_style_num

            if argsdict['use_pstyle_as_style_num'] and self.ballotdict[
                    'pstyle_num']:
                self.ballotdict['style_num'] = self.ballotdict['pstyle_num']
            elif self.ballotdict['ballot_type_id']:
                self.ballotdict['style_num'] = self.ballotdict[
                    'ballot_type_id']
            else:
                self.ballotdict['style_num'] = card_code

        if not self.ballotdict['style_num'] and ballot_style_overrides_dict:
            if ballot_id in ballot_style_overrides_dict:
                return ballot_style_overrides_dict[ballot_id]

        else:
            style_num = self.ballotdict['style_num']
        return style_num

Beispiel #10

Datei anzeigen

Datei: utils.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def sts(string, verboselevel=0, end='\n'):
    return logs.sts(string, verboselevel, end)

Beispiel #11

Datei anzeigen

Datei: LambdaTracker.py Projekt: iero1997/audit-engine-s3-and-lambdas-dev

def wait_for_lambdas(argsdict: dict, task_name=None): #, download_failed=False):
    """ Waits for every lambda request added to LambdaTracker.
    
        Note: not specific to task_name. Only only one use of Lambdas at a time
                by a specific job_name.
        We may want to use task_name to create separate folders for any given task.
        So keep task_name for now even though we are not using it.
    
    """
    if not argsdict['use_lambdas']: return
        
    # running_requests = LambdaTracker.get_status_request_keys('Running')
    total_requests = len(LambdaTracker.lambda_requests.keys())
    running_requests = total_requests
    if not running_requests: return

    wait = 10
    timeout = 60 * 20
    time.sleep(10)  # Just to be sure that all lambdas tracker files are on the bucket
    s3dirpath_completed = DB.dirpath_from_dirname('lambda_tracker', subdir='Completed')
    s3dirpath_failed = DB.dirpath_from_dirname('lambda_tracker', subdir='Failed')

    while timeout > 0 and running_requests:
        time.sleep(wait)
        timeout -= wait

        # running_requests = LambdaTracker.get_status_request_keys('Running')
        files_completed = s3utils.list_files_in_s3dirpath(s3dirpath_completed)
        files_failed = s3utils.list_files_in_s3dirpath(s3dirpath_failed)
        completed_requests = len(files_completed)
        failed_requests = len(files_failed)
        running_requests = total_requests - completed_requests - failed_requests
        if timeout <= 0 or not running_requests:
            break
        logs.sts(f'Waiting for lambdas. Timeout (s): {timeout}. Running: {running_requests}')
        # for request in running_requests:
        #     chunk_name = LambdaTracker.lambda_requests[request].get('chunk_name')
        #     tracker = s3utils.check_lambda_status(argsdict, task_name=task_name, chunk_name=chunk_name)
        #     if tracker:
        #         if tracker.get('status') != 'Running':
        #             #import pdb; pdb.set_trace()
        #             LambdaTracker.lambda_requests[request]['status'] = tracker['status']
        #             utils.sts(f"Task {chunk_name}, ID {request} changed status to {tracker['status']}")
        #             if tracker.get('error_info'):
        #                 LambdaTracker.lambda_requests[request]['error_type'] = tracker['error_info']['error_type']
        #                 LambdaTracker.lambda_requests[request]['error_message'] = tracker['error_info']['error_message']
        #                 LambdaTracker.lambda_requests[request]['error_stack'] = tracker['error_info']['error_stack']
        #     else:
        #         utils.sts(f"Trackign info from job:{job_name}, task:{task_name} and chunk:{chunk_name} not found", 3)

    # failed_requests = LambdaTracker.get_not_done_request_keys()
    failed_requests_log_list = s3utils.list_files_in_s3dirpath(s3dirpath_failed)
    all_succeeded = True
    if failed_requests_log_list:
        # if download_failed:
            # #download_results(argsdict)
            # pass
        for failed_request in failed_requests_log_list:
            print(f'Lambda request failed. please check cloudwatch logs for chunks: {failed_request} \n')
            # request = LambdaTracker.lambda_requests[failed_request]
            # chunk_name = request.get('chunk_name')
            # utils.sts(f'Task: {chunk_name}, ID: {failed_request} failed')
            # if request['status'] == 'Failed':
            #     utils.sts(f"{request.get('error_type')}: {request.get('error_message')}")
            #     error_stack = request.get('error_stack')
            #     for error_item in error_stack:
            #         print(error_item)
            #         #utils.sts(f"Error Stack: {request.get('error_stack')}")
            # else:
            #     utils.sts('Error: TIMEOUT')
            # utils.sts(f"Files payload: {json.dumps(request['task_args'])}", verboselevel=1)
            # print('Files payload list saved to log file')
        all_succeeded = False
        
    logs.sts(f"All lambdas finished; {completed_requests} {round(100 * completed_requests/(completed_requests + failed_requests), 2)}% successful, "
             f"{failed_requests} {round(100 * failed_requests/(completed_requests + failed_requests), 2)}% failed", 3)
             
    return all_succeeded