def get_ballot_images(self):
        """
        Processes files already read as dict of name, bytes_array
        Skips over the step of placing in source.
        """

        self.ballotimgdict['images'] = []
        extension = self.ballotdict['extension']
        utils.sts(f"Converting images from {extension} data...", 3, end='')

        for filedict in self.ballotimgdict['source_files']:
            if extension == '.pdf':
                images = get_images_from_pdf(filedict)
            elif extension == '.pbm':
                images = get_images_from_pbm(filedict)
            elif extension == '.tif':
                images = get_images_from_tif(filedict)
            elif extension == '.png':
                images = get_images_from_png(filedict)
            else:
                utils.exception_report(
                    f"get_ballot_images(): 'extension':{extension} not recognized."
                )
                sys.exit(1)
            self.ballotimgdict['images'].extend(images)
        utils.sts(f"{len(self.ballotimgdict['images'])} image(s) converted.",
                  3)
Esempio n. 2
0
    def set_cells_with_images_to_writeins(file_paths):
        """Reads CVR spreadsheet as a ZIP and extracts information from
        the .xml file about the cells that have images in them.
        Then sets null cells in CVR data frame to write-in, if the cell
        has an image within.
        :param file_path: Path to the CVR file.
        @TODO: Need to fix for s3 operation.
                probably first download the file and then perform the work.
        """
        dirpath = DB.dirpath_from_dirname('archives')
        if dirpath.startswith('s3'):
            utils.sts("Cannot convert images to writeins on s3")
            sys.exit(1)

        if isinstance(file_paths, str):
            file_paths = [file_paths]
        for file_path in file_paths:
            archive = ZipFile(file_path, 'r')
            xml_path = 'xl/drawings/drawing1.xml'
            try:
                xml_file = archive.read(xml_path)
            except KeyError:
                utils.sts(f'Couldn\'t find {xml_path} in {file_path}')
                break
            doc = xml.dom.minidom.parseString(xml_file.decode())
            for cellAnchorElement in doc.getElementsByTagName(
                    'xdr:twoCellAnchor'):
                fromElement = cellAnchorElement.getElementsByTagName(
                    'xdr:from')[0]
                row = fromElement.getElementsByTagName(
                    'xdr:row')[0].firstChild.data
                col = fromElement.getElementsByTagName(
                    'xdr:col')[0].firstChild.data
                CVR.data_frame.iat[int(row) - 1, int(col)] = 'write-in:'
def delete_s3paths(s3paths):
    """ delete a list of s3path in single bucket 
    
    Delete={
            'Objects': [
                {
                    'Key': 'string',
                    'VersionId': 'string'
                },
            ],
            'Quiet': True|False
        },
        MFA='string',
        RequestPayer='requester',
        BypassGovernanceRetention=True|False
    )
    this has been tested.
    
    """
    
    s3dict = parse_s3path(s3paths[0])
    s3 = boto3.resource('s3')
    bucket_obj = s3.Bucket(s3dict['bucket'])
    
    objects_list = []
    for s3path in s3paths:
        s3dict = parse_s3path(s3path)
        objects_list.append({'Key': s3dict['key']})
        
    bucket_obj.delete_objects(Delete={'Objects': objects_list})

    utils.sts(f"Deleted {s3paths}", 3)
def get_and_merge_s3_logs(dirname, rootname='log', chunk_pat=None, subdir=None):
    """
    Fetches all lambda logs from a job folder on S3 that meet rootname, chunk_pat.
    combine into one file, write it to dirname/{rootname}_{dirname}.txt
    :param logs_folder: an S3 folder to fetch lambda logs from
    :return:
    
    log file name: f"log_{group_root}_{dirname}_chunk_{str(chunk_idx)}.txt"
    """
    utils.sts(f"Getting the {rootname} files from s3 and combining")
    
    # download all the log files
    # make sure tmp is empty.
    tmp_dirpath = DB.dirpath_from_dirname('tmp')
    shutil.rmtree(tmp_dirpath, ignore_errors=True)
    
    sts(f"Downloading all {rootname} files, one per chunk", 3)
    # download according to matching pattern
    DB.download_entire_dirname(dirname=dirname, subdir=subdir, file_pat=fr"{rootname}_{chunk_pat}\.txt", local_dirname='tmp')
    
    sts(f"Combining {rootname} files", 3)
    dest_name = f"{rootname}_{dirname}.txt"
    dest_dirpath = DB.dirpath_from_dirname(dirname=dirname, s3flag=False)
    combined_log_filepath = dest_dirpath + dest_name

    num_files = merge_txt_dirname(dirname='tmp', subdir=subdir, destpath=combined_log_filepath, file_pat=f"{rootname}_*.txt")
    
    sts(f"Writing combined {rootname} file: {combined_log_filepath} to s3 in dirname:'{dirname}'", 3)
    if os.path.exists(combined_log_filepath):
        DB.upload_file_dirname(dirname, dest_name, local_dirname='tmp')
    return num_files
Esempio n. 5
0
def convert_cvr_to_styles(argsdict: dict = None, silent_error: bool = False):
    """ ENTRY POINT FROM main
    
        --op cvr2styles
        
    Given list of cvr files, generate two dicts:
    master_style_dict -- indexed by style, provides list of contests
    
    cvr_ballotids_to_style_dict -- indexed by ballotid, provde style.
    THIS DICT IS NOW DEPRECATED AND WILL USE BIF INSTEAD.
    
    :param argsdict: Dict of arguments passed on script input.
    :param silent_error: Flag saying if exceptions should not return a message.
    """
    utils.sts('Generating styles dict from cvr files', 3)
    
    if argsdict['vendor'] == 'Dominion':
        get_styles_to_contests_dominion(argsdict, 
            ballot_type_contest_manifest='BallotTypeContestManifest.json',
            contest_manifest='ContestManifest.json', 
            just_ids=False,
            silent_error=silent_error
            )
    else:
        convert_cvr_to_styles_ess(argsdict, silent_error=silent_error)
def read_buff_from_s3path(s3path):
    if not does_s3path_exist(s3path):
        utils.sts(f"s3path {s3path} not found, cannot read_buff_from_s3path", 3)
        sys.exit(1)
    s3dict = parse_s3path(s3path)
    buff = get_s3_core(s3dict['bucket'], s3dict['key'])
    return buff
Esempio n. 7
0
    def combine_dirname_dfs(dirname, subdir=None, file_pat=None, s3flag=None):
        """
        Combine csv as dfs into a single df in memory.
        This function does not create any new files.
        """

        full_df = pd.DataFrame()
        names_list = DB.list_files_in_dirname_filtered(dirname=dirname,
                                                       subdir=subdir,
                                                       file_pat=file_pat,
                                                       fullpaths=False,
                                                       no_ext=False,
                                                       s3flag=s3flag)
        for name in names_list:
            utils.sts(f"...combining dirname:'{dirname}' name:'{name}'", 3)
            this_df = DB.load_data(dirname=dirname,
                                   name=name,
                                   subdir=subdir,
                                   s3flag=s3flag,
                                   silent_error=False)
            full_df = full_df.append(this_df, ignore_index=True)

        utils.sts(
            f"Total of {len(full_df.index)} records in combined {dirname} df",
            3)

        return full_df
Esempio n. 8
0
def get_attribute_from_path(argsdict, ballot_image_path, attribute_name):
    """ given ballot_image_path from zip archive, extract attribute from path
        based on setting of level from argsdict for the attribute.
        attribute of -1 means not available.
        attribute_names are: 'precinct-folder', 'party-folder', 'group-folder'
        returns '' if attribute of -1 is specified.
    """

    attribute_str = ''
    path_segments = re.split(r'[/\\]', ballot_image_path)
    path_segments.pop()

    folder_level = int(argsdict.get(
        attribute_name, 0))  # -1 means the path does not provide this info.
    if folder_level >= 0:
        if not (folder_level < len(path_segments)):
            utils.sts(
                f"get_attribute_from_path: {attribute_name} input spec {folder_level} is out of range. Must be less than {len(path_segments)}\n"
                f"ballot_image_path provided is {ballot_image_path}")
            import pdb
            pdb.set_trace()
            sys.exit(1)
        attribute_str = path_segments[folder_level]
    #elif attribute_name == 'precinct-folder':
    #    utils.exception_report(f"{attribute_name} specified as -1, this attribute cannot be determined from ballot file path. "
    #              f"Apparently all image files are provided in one big heap. Consider using 'precinct_pattern' input parameter.")
    #    attribute_str = 'Unspecified Precinct'

    return attribute_str
def delegate_task_chunk(task_args):

    argsdict = task_args.get('argsdict')
    task_name = task_args.get('task_name')
    if argsdict.get('save_lambda_task_args'):
        # Just save the task arguments to a file and stop. This file will be used to test lambda locally
        # Save only first chunk in file if 'save_lambda_task_args' set True. So that we can debug lambda for first chunk arguments.
        with open(f'./input_files/{task_name}_lambda_task_args.json', 'w+') as f:
            f.write(json.dumps({"task_args": task_args}))
            f.close()
            print(f'task_args is written to file: /input_files/{task_name}_lambda_task_args.json')
        sys.exit(0)

    utils.sts(f"Submitting chunk #{task_args['chunk_idx']} for task {task_args['dirname']}.", 3)
    if config_d.MOCK_LAMBDA:
        request_id = 'fake_lambda_id'
    else:        
        response = s3utils.invoke_lambda(
            function_name=f"arn:aws:lambda:us-east-1:174397498694:function:{argsdict['lambda_function']}",
            async_mode=True,
            custom_payload={'task_args': task_args},
            region='us-east-1'
            )
        request_id = response['ResponseMetadata']['RequestId']

    LambdaTracker.add_new_request(
        request_id=request_id,
        chunk_name=task_args['chunk_name'],
        task_args=task_args
        )
def fuzzy_compare_strlists(correct_strlist,
                           ocr_strlist,
                           thres,
                           justify='full') -> tuple:  # (match_bool, metric)
    """ return True if all strings match in the order given else False"""
    utils.sts(
        "fuzzy_compare_strlists Comparing:\n"
        f"correct: '{join_remove_nl(correct_strlist)}'\n"
        f"ocrlist: '{join_remove_nl(ocr_strlist)}'", 3)
    metric = 1.0

    if len(correct_strlist) != len(ocr_strlist):
        logs.exception_report(
            f"Mismatched strlist lengths: correct:{correct_strlist}({len(correct_strlist)}) ocr_strlist:{ocr_strlist}({len(ocr_strlist)})"
        )
        return False, 0.0

    for correct_str, ocr_str in zip(correct_strlist, ocr_strlist):
        # ouch! zip function above stops when the shortest string is exhausted. Not usable here!
        flag, metric = fuzzy_compare_str(correct_str,
                                         ocr_str,
                                         thres=thres,
                                         justify=justify,
                                         method='levdist')
        if not flag:
            # can stop early if they don't match
            return flag, metric
    return True, metric
def fuzzy_compare_str_to_list(correct_strlist: list,
                              ocr_str: str,
                              thres: float,
                              fuzzy_compare_mode='best_of_all') -> tuple:
    """ return True if ocr_str is found in correct_strlist
        with index offset where it is found, and metric.
    """
    utils.sts(
        f"Comparing strlists\ncorrect '{join_remove_nl(correct_strlist)}'\n"
        f"ocr_str '{ocr_str}'", 3)
    metrics = fuzzy_metrics_str_to_list(correct_strlist, ocr_str,
                                        fuzzy_compare_mode)

    if not metrics:
        return False, 0, 0

    # note that we can't just sort the metrics here because we need to keep them in order.
    max_metric = max(metrics)
    max_idx = metrics.index(max_metric)

    if len(metrics) > 1:
        metrics.sort(reverse=True)
        if (metrics[0] > 0.7) and (metrics[0] - metrics[1] < 0.3):
            string = f"Close fuzzy discrimination: max_metric:{metrics[0]} next_metric:{metrics[1]}\n" \
                     f"ocr_str:{ocr_str} correct_strlist:{', '.join(correct_strlist)}"
            utils.exception_report(string)

    return bool(max_metric > thres), max_idx, max_metric
Esempio n. 12
0
def load_bof_df(argsdict):
    """returns conversions for ballot options.
    This function implements the Ballot Options File (BOF)
    """
    bof_columns = [
        'official_contest_name',
        # official contest name used as a means to look up the ballot option.
        'official_option',
        # one option per record used as a second index to look up the ballot option
        'ballot_option',
        # ballot options as shown on the ballot, and only provided if the ballot
        # option differs from the official option.
    ]
    bof_filename = argsdict.get('bof')
    if not bof_filename:
        return None

    bof_df = DB.load_data(dirname='EIFs',
                          name=bof_filename,
                          silent_error=False,
                          user_format=True)

    bof_df = check_table(bof_df,
                         table_name=bof_filename,
                         required_columns_list=bof_columns,
                         strip_cols=bof_columns)

    utils.sts(f"BOF {bof_filename} loaded.")
    return bof_df
Esempio n. 13
0
def get_replacement_cvr_header(argsdict: dict) -> list:
    """
    :param args_dict: Dict of arguments passed on script input.
    """
    utils.sts("Loading EIF...", 3)

    eif_filename = argsdict.get('eif')

    eif_df = DB.load_data(dirname='EIFs', name=eif_filename, user_format=True)

    eif_df = check_table(eif_df,
                         table_name=eif_filename,
                         required_columns_list=EIF_REQUIRED_COLS,
                         strip_cols=EIF_STRIP_COLS)

    cvr_replacement_header_list = list(eif_df['official_contest_name'])
    expected_initial_cvr_cols = argsdict.get(
        'initial_cvr_cols', ['Cast Vote Record', 'Precinct', 'Ballot Style'])
    if not all(item in cvr_replacement_header_list
               for item in expected_initial_cvr_cols):
        expected_cols = ','.join(expected_initial_cvr_cols)
        utils.sts(
            f"ERROR: CVR does not have the expected fields in the header {expected_cols}",
            0)
        sys.exit(1)
    return cvr_replacement_header_list
def plotmetrics():
    combined_marks_df = load_all_marks_df()
    utils.sts(f"Total of {len(combined_marks_df.index)} records in combined marks_df")
    only_marks_df = combined_marks_df[~combined_marks_df['option'].str.match('#contest')]

    # TODO: Where's this magic number(840) coming from?
    bins = list(range(840))[::10]
    only_marks_df[['pixel_metric_value']].plot(kind='hist', bins=bins, rwidth=0.8)
    plt.show()
def parse_dominion_cvr_chunks_to_dict(argsdict: dict, cvr_path: str) -> dict:
    """
        Using Lambdas for this operation was found not to be necessary once
        we optimized the operation of creating the pandas tables.

        read json CVR file in Dominion format.
        create dict keyed by ballot_id with dict of attributes.
        'ballot_type_id'    - 1-180 code BallotTypeId from CVR
        'is_bmd'               - 1 if ballot is bmd
        'card_code'         - style code found on the ballot
        'cvr_name'          - name of the cvr chunk (filename without path)
        'style_num'         - style indicator (str)
        'sheet0'            - sheet value decoded from card_code
        
        This is not currently extracting the list of contests included in the CVR.
        

    """
    cvr_dict = {}
    cvr_reg = r'CvrExport_\d+\.json'
    archive = open_zip_archive(cvr_path)
    cvrlist = [n for n in archive.namelist() if re.match(cvr_reg, n)]
    total_num = len(cvrlist)
    for index, name in enumerate(cvrlist):
        cvr_name = os.path.basename(name)
        if (index+1) % 100 == 0:
            utils.sts(f"Parsing CVR JSON file {cvr_name} #{index}: of {total_num} ({round(100 * (index+1) / total_num, 2)}%)")

        data = json.loads(archive.read(name))
        for session in data['Sessions']:
            tabulator_id = session.get('TabulatorId')
            batch_id = session.get('BatchId')
            record_id = session.get('RecordId')
            is_bmd = True if session.get('SessionType') == 'QRVote' else False
            try:
                card_code = session['Original']['Cards'][0]['Id']
            except KeyError:
                card_code = 0
            try:
                ballot_type_id = session['Original']['BallotTypeId']
            except KeyError:
                ballot_type_id = 0

            style_num, sheet0 = dominion_build_effective_style_num(argsdict, card_code, ballot_type_id)

            ballot_id = f'{tabulator_id:05d}_{batch_id:05d}_{record_id:06d}'
            cvr_dict[ballot_id] = {
                'is_bmd': is_bmd,
                'cvr_name': cvr_name,
                'style_num': style_num,             # internal style number LPSETTT Lang, Party, Sheet, ETTT ExternalId , ballot_type_id
                'card_code': str(card_code),
                'ballot_type_id': int(ballot_type_id),
                'sheet0': sheet0,
            }

    return cvr_dict
Esempio n. 16
0
def save_style_ballot_images(ballots: list, style_num):
    for ballot in ballots:
        utils.sts(f"Saving images for ballot {ballot.ballotdict['ballot_id']}",
                  3)

        DB.save_data_list(data_list=ballot.ballotimgdict['images'],
                          dirname='styles',
                          name=ballot.ballotdict['ballot_id'],
                          format='.png',
                          subdir=style_num)
def extractvote_by_tasklists(argsdict: dict):
    """
    ACTIVE
    This replaces the extractvotes function.
    given tasklists which exist in the extraction_tasks folder,

    Tasklists are generated by reviewing the BIF tables.
    Each tasklist creates a separate f"marks_{tasklist_name}.csv" file in the results folder.

    """
    logs.sts('Extracting marks from extraction tasklists', 3)

    tasklists = DB.list_files_in_dirname_filtered(dirname='marks', subdir='tasks', file_pat=r'^[^~].*\.csv$', fullpaths=False)
    total_num = len(tasklists)
    utils.sts(f"Found {total_num} taskslists", 3)

    use_lambdas = argsdict['use_lambdas']

    if use_lambdas:
        LambdaTracker.clear_requests()
        #clear_instructions(config_d.TASKS_BUCKET, Job.get_path_name())

    biflist = get_biflist(no_ext=True)

    for bif_idx, bifname in enumerate(biflist):
        archive_name = re.sub(r'_bif', '', bifname)
        genmarks_tasks = [t for t in tasklists if t.startswith(archive_name)]
    
        for chunk_idx, tasklist_name in enumerate(genmarks_tasks):
        
            #----------------------------------
            # this call may delegate to lambdas and return immediately
            # if 'use_lambdas' is enabled.
            # otherwise, it blocks until the chunk is completed.
            
            build_one_chunk(argsdict, 
                dirname='marks', 
                chunk_idx=chunk_idx, 
                filelist=[tasklist_name], 
                group_name=bifname,
                task_name='extractvote', 
                incremental=False)

            #----------------------------------

            if not chunk_idx and not bif_idx and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='extractvote'):
                    utils.exception_report("task 'extractvote' failed delegation to lambdas.")
                    sys.exit(1)           

    wait_for_lambdas(argsdict, task_name='extractvote')

    utils.combine_dirname_chunks_each_archive(argsdict, dirname='marks')
    logs.get_and_merge_s3_logs(dirname='marks', rootname='log', chunk_pat=r"_chunk_\d+", subdir="chunks")
    logs.get_and_merge_s3_logs(dirname='marks', rootname='exc', chunk_pat=r"_chunk_\d+", subdir="chunks")
def combine_archive_bifs():
    """
    BIF tables are constructed for each archive. Combine these into a single BIF table.
    Returns full_bif_df. 
    
    NOTE! This function does not create any new files.
    
    """
    utils.sts("Combining archive bifs", 3)
    
    return DB.combine_dirname_dfs(dirname='bif', file_pat=r'_bif\.csv')
Esempio n. 19
0
 def read_style_dict_from_json(tag) -> dict:
     """
     Reads the dictionary of styles to a JSON file.
     :param tag: filename decoration to distinguish the files
     """
     CVR.make_dir()
     file_path = f"{config_dict['STYLE_DICT']}{tag}.json"
     try:
         with open(file_path, 'r') as jf:
             return json.load(jf)
     except FileNotFoundError:
         utils.sts(f"Style Dict JSON file '{tag}' not found", 3)
         return None
def fetch_s3key_to_dirpath(s3key: str, local_dirpath: str, bucket_obj, silent=True):
    """ download object with key s3key from bucket_obj
        extract basename from key
        write to local_dirpath
    """

    s3basename = s3key.rpartition('/')[-1]
    local_path = os.path.join(local_dirpath, s3basename)
        
    if not silent:
        utils.sts(f"Fetching: {s3key} to {local_path}", 3)
        
    bucket_obj.download_file(s3key, local_path, Config=CUSTOM_CONFIG)
Esempio n. 21
0
def get_pmv_from_df(audit_df: pd.DataFrame, ballot_id: str, contest: str,
                    option: str) -> int:
    option_df = audit_df.loc[(audit_df['ballot_id'] == int(ballot_id))
                             & (audit_df['contest'] == contest) &
                             (audit_df['option'] == option)]
    pmvs_list = list(option_df['pixel_metric_value'])
    if len(pmvs_list) > 1:
        utils.sts(
            f"Unexpected Condition: pixel_metric_values is multivalued for ballot_id {ballot_id}, "
            f"contest {contest}, option {option}", 3)
    elif not pmvs_list:
        return 999

    return pmvs_list[0]
def load_all_marks_df():
    """
    load marks data frame from results and combines them
    returns combined dataframe.
    @TODO -- this can use general combine chunks 
    """
    combined_marks_df = pd.DataFrame()
    df_filename_list = get_marks_df_list()
    utils.sts(f"Total of {len(df_filename_list)} marks_df chunks detected.", 3)
    for df_file in df_filename_list:
        marks_df = load_one_marks_df(df_file)
        combined_marks_df = combined_marks_df.append(marks_df, sort=False, ignore_index=True)
        utils.sts(f"appended {df_file} chunk, {len(marks_df.index)} records, total of {len(combined_marks_df.index)} records.", 3)
    return combined_marks_df
def cmpcvr_by_one_tasklist(argsdict, tasklist_name):
    """ This is the primary function to be run inside lambda for cmpcvr.
    
        tasklist_name is like "{archive_root}_chunk_{chunk_idx}"
    """
    # set s3 vs local mode -- this probably better done long before this point.
    DB.set_DB_mode()        

    contests_dod = DB.load_data('styles', 'contests_dod.json')
    if CVR.data_frame.empty:
        CVR.load_cvrs_to_df(argsdict)
    
    #        marks/chunks/{archive_root}_chunk_{chunk_idx}.csv           # individual marks chunks. These are kept for cmpcvr


    if not DB.file_exists(file_name=tasklist_name+'.csv', dirname='marks', subdir="chunks"):
        utils.sts(f"Logic Error: no marks df missing: {tasklist_name}")
        traceback.print_stack()
        sys.exit(1)

    audit_df = DB.load_data(dirname='marks', subdir="chunks", name=tasklist_name, format='.csv')
    
    #---------------------------------------
    # primary call of this function performs chunk comparison
    
    overvotes_results, disagreed_results, blank_results = compare_chunk_with_cvr(
        argsdict=argsdict,
        contests_dod=contests_dod,
        cvr_df=CVR.data_frame,
        audit_df=audit_df,
        chunk_name=tasklist_name,
        )
    #---------------------------------------
    """
        cmpcvr/chunks/disagreed_{archive_root}_chunk_{chunk_idx}.csv    # individual cmpcvr disagreed chunks
        cmpcvr/chunks/overvotes_{archive_root}_chunk_{chunk_idx}.csv    # individual cmpcvr overvote chunks
    """
        
       
    DB.save_data(data_item=disagreed_results, 
        dirname='cmpcvr', subdir='chunks', 
        name=f"disagreed-{tasklist_name}.csv")

    DB.save_data(data_item=disagreed_results, 
        dirname='cmpcvr', subdir='chunks', 
        name=f"overvotes-{tasklist_name}.csv")

    DB.save_data(data_item=blank_results, 
        dirname='cmpcvr', subdir='chunks', 
        name=f"blanks-{tasklist_name}.csv")
Esempio n. 24
0
def dict_of_df_record(df, key, val, dfname='Unnamed', silent_error=False):
    """ I don't think this is actually necessary because just reading the record creates a dict.
    """

    list_of_dict = df.to_dict(orient='records')
    for record in list_of_dict:
        record = strip_dict(record)
        if record[key] == val:
            return record
    if silent_error:
        return None
    utils.sts(
        f"Key error. Can't find record with {key} == '{val}' in dataframe {dfname}\n{df}",
        0)
    sys.exit(1)
Esempio n. 25
0
    def delete_s3_results(argsdict: dict, mode='all', subdir=None):
        job_folder_s3path = argsdict.get('job_folder_s3path')

        utils.sts(
            f"Deleting results from job_folder_s3path: '{job_folder_s3path}'",
            3)
        if mode == 'all':
            for dirname in [
                    'cmpcvr', 'marks', 'marks_chunks', 'cvr_chunks',
                    'lambda_tracker'
            ]:
                DB.delete_dirname_files_filtered(dirname=dirname, s3flag=True)
        else:
            DB.delete_dirname_files_filtered(dirname=subdir, s3flag=True)
        utils.sts("Finished")
Esempio n. 26
0
def open_local_archive(source_path, testzip=False, silent_error=False):
    """ Deals with the error conditions raised in open_zip_archive
        Q: why is it a good idea to keep these separate?
        It seems that only one archive can be open at a time.
        
    """
    # we've had trouble with spurious "file does not exist" detections when it does.
    source_path = os.path.normcase(os.path.normpath(source_path)).strip()

    if os.path.isfile(source_path):
        utils.sts(f"Verified that {source_path} exists.")
    else:
        utils.sts(
            f"Archive {source_path} does not exist according to os.path.exists()."
        )

        # this may be a spurious problem related to using a file server.

        tot_time = 0
        for i in range(1, 20):
            utils.sts(f"Waiting {i} seconds", 3)
            time.sleep(i)
            tot_time += i
            if os.path.isfile(source_path):
                utils.sts(
                    f"After wait of {tot_time} secs, {source_path} now exists according to os.path.exists().",
                    3)
                #import pdb; pdb.set_trace()
                break
        else:
            utils.sts(
                f"After wait of {tot_time} secs, {source_path} still not found according to os.path.exists().",
                3)
            import pdb
            pdb.set_trace()
            sys.exit(1)

    try:
        archive = open_zip_archive(source_path, testzip)
    except (FileNotFoundError, ValueError) as error:
        if not silent_error:
            logging.error(
                f"Failed to open archive {source_path} Program failed due to %s",
                error)
            sys.exit(1)
        else:
            return None
    return archive
def get_style_lookup_table(argsdict: dict, s3=False):
    """
    :manual_styles_to_contests_filename str: filename to CSV file with contests and styles table.
    :return: pandas df suitable for lookup of precinct, party, and provide style_num
    """
    
    style_lookup_table_filename = argsdict.get('style_lookup_table_filename')
    if not style_lookup_table_filename:
        return None
    
    utils.sts("style lookup table specified. Loading...", 3, end='')

    style_lookup_table_df = DB.load_data('config', style_lookup_table_filename, user_format=True)
        
    utils.sts("completed", 3)
    
    return style_lookup_table_df
Esempio n. 28
0
def copy_ballot_pdfs_from_archive_to_report_folder(archive, filepaths,
                                                   ballot_id, dirname):
    target_filename = f"{ballot_id}i.pdf"
    target_folder = DB.dirpath_from_dirname(dirname)
    ballot_paths = [
        x for x in filepaths if re.search(r'[\\/]' + target_filename, x)
    ]
    if len(ballot_paths):
        utils.sts(f"Extracting {ballot_paths[0]} from archive", 3)
        archive.extract(ballot_paths[0], path=target_folder)
        return

    utils.sts(
        f"Logic error: Failed to find ballot_id {ballot_id} in ballot archive.",
        0)
    traceback.print_stack()
    sys.exit(1)
Esempio n. 29
0
def dict_of_df_record2(df,
                       key1,
                       val1,
                       key2,
                       val2,
                       dfname='Unnamed',
                       silent_error=False):
    list_of_dict = df.to_dict(orient='records')
    for record in list_of_dict:
        record = strip_dict(record)
        if record[key1] == val1 and record[key2] == val2:
            return record
    if silent_error:
        return {}
    utils.sts(
        f"Key error. Can't find record with {key1} == {val1} and {key2} == {val2} in dataframe {dfname}",
        0)
    sys.exit(1)
def save_failing_ballots(argsdict):
    """ given list of ballots in inputfile, copy the original ballot image files
        to (job_folder_path)/styles/(ballot_id) folders
        
        this function
            1. builds single bif table.
            2. looks each ballot up.
            3. using entry, opens the indicated archive and extracts the original file.
            4. saves the file in folder of jobname and ballot_id in styles, see above.
    """
    
    full_bif_df = combine_archive_bifs()
    
    ballot_list = argsdict['ballotid']
    
    #archives_folder_path = argsdict['archives_folder_path']
    opened_archive_basename = ''
    archive = None
    
    for ballot_id in ballot_list:
        utils.sts(f"processing ballot_id:{ballot_id}", 3)
        rows = full_bif_df.loc[full_bif_df['ballot_id'] == ballot_id]       # select set of rows with value in column_name equal to some_value.
        
        archive_basename = rows['archive_basename'].values.item()     # return one item from a row
        file_paths_str = rows['file_paths'].values.item()
        file_paths = file_paths_str.split(';')
        
        dest_dirpath = DB.dirpath_from_dirname('styles')
        
        if archive_basename != opened_archive_basename:
            if opened_archive_basename:
                archive.close()
            archive = open_archive(argsdict, archive_basename)
            opened_archive_basename = archive_basename
            
        for file_path in file_paths:
            basename = os.path.basename(file_path)
            dest_filepath = os.path.join(dest_dirpath, ballot_id, basename)
            extract_file(archive, file_path, dest_filepath)
            utils.sts(f"...extracted:{file_path} to {dest_filepath}", 3)
        
    if opened_archive_basename:
        archive.close()