def post_gentemplate_cleanup(argsdict): # this portion of the above function has been separated to allow for individual testing. # normally, we combine chunks, but in the case of styles generation, this is not needed except for roismap. logs.sts("gentemplates_by_tasklists completed.\n", 3) #import pdb; pdb.set_trace() if argsdict['include_maprois']: #styles_completed = DB.list_subdirs_with_filepat('styles', file_pat=r'\.json$', s3flag=None) #attempted_but_failed_styles = [s for s in styles_on_input if s not in styles_completed] logs.sts("Combining roismap for each style into a single .csv file.", 3) DB.combine_dirname_chunks(dirname='styles', subdir="roismap", dest_name='roismap.csv', file_pat=r'_roismap\.csv') good_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_good_maps') fail_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_failed_maps') logs.sts(f"{good_map_num} styles successfully mapped; {fail_map_num} styles did not fully map.", 3) # style logs are placed in one folder in styles # logs are like exc_11010_styles_chunk_84.txt # downloads file_pat=fr"{rootname}_{chunk_pat}\.txt" logs.get_and_merge_s3_logs(dirname='styles', rootname='log', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs') logs.get_and_merge_s3_logs(dirname='styles', rootname='exc', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs')
def save_style_ballot_images(ballots: list, style_num): for ballot in ballots: utils.sts(f"Saving images for ballot {ballot.ballotdict['ballot_id']}", 3) DB.save_data_list(data_list=ballot.ballotimgdict['images'], dirname='styles', name=ballot.ballotdict['ballot_id'], format='.png', subdir=style_num)
def save_ballot_images(self): """Method to save ballot as an image (JPG) file. This saves them by precinct rather than by style. """ DB.save_data_list(data_list=self.ballotimgdict['images'], dirname='styles', name=self.ballotdict['ballot_id'], format='.png', subdir=self.ballotdict['precinct'])
def save_ballot(self): """ Saves ballot data to JSON file. It coverts ballot attributes to a dictionary on its own with 'get_ballot_data' helper or use passed 'data' dictionary. """ DB.save_data(data_item=self.ballotdict, dirname='results', name=self.ballotdict['ballot_id'] + '.json', subdir=self.ballotdict['precinct'])
def load_excel_to_df(argsdict: dict, filename_list: list, column_names_list: list): """ Reads a CVR excel file and saves it as a pandas data frame. Combines multiple CVR files and assumes columns are identical. Renames unnamed columns by duplicating last column name. This is specific to ES&S cvr files. """ for idx, file_name in enumerate(filename_list): utils.sts(f"Reading cvr file {file_name}...") if not idx: #CVR.data_frame = pd.read_excel(file, engine='xlrd') CVR.data_frame = DB.load_data(dirname='archives', name=file_name, user_format=True) else: # df = pd.read_excel(file, engine='xlrd') df = DB.load_data(dirname='archives', name=file_name, user_format=True) CVR.data_frame = CVR.data_frame.append(df, ignore_index=True) if argsdict.get('convert_cvr_image_cells_to_writein', False): CVR.set_cells_with_images_to_writeins(argsdict['cvr']) if column_names_list: utils.sts( "replacing column names with replacement column names provided." ) # use the replacement column headers instead of those provided. orig_col_names = CVR.data_frame.columns if not len(orig_col_names) == len(column_names_list): utils.sts( "replacement column headers not right length to replace header names in CVR" ) sys.exit(1) # we will replace any "blank" col names with "Unnamed: XXX" so we can remove them later. for i, orig_col_name in enumerate(orig_col_names): if re.match(r'Unnamed:', orig_col_name): column_names_list[i] = orig_col_name CVR.data_frame.columns = column_names_list utils.sts("Checking for duplicate column names.") # at this point, there should be no duplicate column names. column_name_set = len(set(CVR.data_frame.columns)) column_name_list = len(list(CVR.data_frame.columns)) if not column_name_set == column_name_list: utils.sts("Column Names are duplicated") sys.exit(1) utils.sts( "Replacing columns with 'Unnamed' with prior named column name.") CVR.data_frame.columns = CVR.rename_unnamed( list(CVR.data_frame.columns))
def combine_dirname_chunks_each_archive(argsdict, dirname): """ combine all the chunks in a specific dirname into {archive_rootname}_{dirname}.csv files, one per archive. Do this in the dirname folder. """ for archive_idx, source in enumerate(argsdict['source']): archive_rootname = os.path.splitext(os.path.basename(source))[0] DB.combine_dirname_chunks( dirname=dirname, subdir='chunks', dest_name=f"{archive_rootname}_{dirname}.csv", file_pat=fr"{archive_rootname}_{dirname}_chunk_\d+\.csv")
def accept_delegation_task_chunk(request_id, task_args): """ This is a locally callable function to allow debugging. right after args are unpacked. """ args.argsdict = argsdict = task_args['argsdict'] argsdict['on_lambda'] = True DB.set_DB_mode() chunk_name = task_args['chunk_name'] #dirname = task_args['dirname'] #subdir = task_args['subdir'] job_name = argsdict['job_name'] # we must be aware that lambdas are not fully initialized prior to use. If one lambda finishes its work of the same kind, and # another is started, the state in that lambda is indeterminate, but we have found that files and data structures may still # exist. # no need to report that the Lambda is 'Running' -- we already know that. # LambdaTracker.lambda_report_status(task_args, request_id, status='Running') try: launch_task(task_args, s3flag=True) # pylint: disable=broad-except # We need to catch broad exception. except Exception as err: error_info = { 'error_type': err.__class__.__name__, 'error_message': repr(err), 'error_stack': traceback.format_tb(err.__traceback__), 'task_args': task_args, } LambdaTracker.lambda_report_status(task_args, request_id, status="Failed", error_info=error_info) msg = f"{job_name} Failed" else: LambdaTracker.lambda_report_status(task_args, request_id, status='Completed') error_info = None msg = f"{job_name} Completed" return { 'body': json.dumps({ 'msg': msg, 'error_info': error_info, 'chunk_name': chunk_name, }) }
def save_ballot_pdf(self): """Extracts ballot pdf file to be able to view it in the web browser. This appears to be unused. """ precinct = self.ballotdict['precinct'] ballot_id = self.ballotdict['ballot_id'] pdf_file = self.ballotimgdict['pdf_file'] DB.save_data(data_item=pdf_file.get('bytes_array'), dirname='disagreements', name=f'{ballot_id}.pdf', format='.pdf', subdir=precinct)
def getByLogin(self, login): banco = DB() try: c = banco.conexao.cursor() c.execute('SELECT * FROM clientes WHERE login = %s', (login)) for linha in c: self.id = linha[0] self.login = linha[1] self.senha = linha[2] self.grupo = linha[3] self.nome = linha[4] self.endereco = linha[5] self.numero = linha[6] self.observacao = linha[7] self.cep = linha[8] self.bairro = linha[9] self.cidade = linha[10] self.estado = linha[11] self.telefone = linha[12] self.email = linha[12] c.close() if not self.id: return 'Usuário não encontrado!' return 'Busca feita com sucesso!' except: return 'Ocorreu um erro na busca do usuário'
def set_cells_with_images_to_writeins(file_paths): """Reads CVR spreadsheet as a ZIP and extracts information from the .xml file about the cells that have images in them. Then sets null cells in CVR data frame to write-in, if the cell has an image within. :param file_path: Path to the CVR file. @TODO: Need to fix for s3 operation. probably first download the file and then perform the work. """ dirpath = DB.dirpath_from_dirname('archives') if dirpath.startswith('s3'): utils.sts("Cannot convert images to writeins on s3") sys.exit(1) if isinstance(file_paths, str): file_paths = [file_paths] for file_path in file_paths: archive = ZipFile(file_path, 'r') xml_path = 'xl/drawings/drawing1.xml' try: xml_file = archive.read(xml_path) except KeyError: utils.sts(f'Couldn\'t find {xml_path} in {file_path}') break doc = xml.dom.minidom.parseString(xml_file.decode()) for cellAnchorElement in doc.getElementsByTagName( 'xdr:twoCellAnchor'): fromElement = cellAnchorElement.getElementsByTagName( 'xdr:from')[0] row = fromElement.getElementsByTagName( 'xdr:row')[0].firstChild.data col = fromElement.getElementsByTagName( 'xdr:col')[0].firstChild.data CVR.data_frame.iat[int(row) - 1, int(col)] = 'write-in:'
def load_bof_df(argsdict): """returns conversions for ballot options. This function implements the Ballot Options File (BOF) """ bof_columns = [ 'official_contest_name', # official contest name used as a means to look up the ballot option. 'official_option', # one option per record used as a second index to look up the ballot option 'ballot_option', # ballot options as shown on the ballot, and only provided if the ballot # option differs from the official option. ] bof_filename = argsdict.get('bof') if not bof_filename: return None bof_df = DB.load_data(dirname='EIFs', name=bof_filename, silent_error=False, user_format=True) bof_df = check_table(bof_df, table_name=bof_filename, required_columns_list=bof_columns, strip_cols=bof_columns) utils.sts(f"BOF {bof_filename} loaded.") return bof_df
def read_settings_csv_file(dirname, name, argspecs_dod, name_field='name', value_field='value'): """ reads settings with columns name_field and value_field into dict[name] = value """ inputdict = {} error_flag = False if not name: return {} print(f"Input file specified. Reading input from file '{name}'...") # need to be able to load from s3 or local. settings_df = DB.load_data(dirname='input_files', name=name, format='.csv', user_format=True, s3flag=False) settings_lod = settings_df.to_dict(orient='records') for setting_dict in settings_lod: name = setting_dict[name_field].strip(' ') if name not in argspecs_dod: print (f"{name_field} '{name}' not supported.") error_flag = True continue add_value_of_type( inputdict, name=name, spec_type=argspecs_dod[name]['type'], valstr=setting_dict[value_field] ) if error_flag: sys.exit(1) return inputdict
def get_dirname_results(dirname, s3flag=None): """ return list of s3paths or file_paths to result files, one per archive. """ file_pat=f".*_{dirname}\\.csv" file_paths = DB.list_filepaths_in_dirname_filtered(dirname, file_pat=file_pat, s3flag=s3flag) return file_paths
def get_replacement_cvr_header(argsdict: dict) -> list: """ :param args_dict: Dict of arguments passed on script input. """ utils.sts("Loading EIF...", 3) eif_filename = argsdict.get('eif') eif_df = DB.load_data(dirname='EIFs', name=eif_filename, user_format=True) eif_df = check_table(eif_df, table_name=eif_filename, required_columns_list=EIF_REQUIRED_COLS, strip_cols=EIF_STRIP_COLS) cvr_replacement_header_list = list(eif_df['official_contest_name']) expected_initial_cvr_cols = argsdict.get( 'initial_cvr_cols', ['Cast Vote Record', 'Precinct', 'Ballot Style']) if not all(item in cvr_replacement_header_list for item in expected_initial_cvr_cols): expected_cols = ','.join(expected_initial_cvr_cols) utils.sts( f"ERROR: CVR does not have the expected fields in the header {expected_cols}", 0) sys.exit(1) return cvr_replacement_header_list
def merge_csv_dirname_local(dirname, subdir, dest_name, dest_dirname=None, file_pat=None): """ merge all csv files in local dirname meeting file_pat into one to dest_name uses header line from first file, discards header is subsequent files. all csv files must have the same format. """ if dest_dirname is None: dest_dirname = dirname sts(f"Merging csv from {dirname} to {dest_dirname}/{dest_name}", 3) src_dirpath = DB.dirpath_from_dirname(dirname, subdir=subdir, s3flag=False) dest_dirpath = DB.dirpath_from_dirname(dest_dirname, s3flag=False) destpath = os.path.join(dest_dirpath, dest_name) first_pass = True infilepath_list = glob.glob(f"{src_dirpath}*.csv") for idx, infilepath in enumerate(infilepath_list): basename = os.path.basename(infilepath) if file_pat is not None and not re.search(file_pat, basename): # skip any files that are not the lambda download format, including the one being built continue if infilepath == destpath: # make sure we are not appending dest to itself. continue #sts(f"Appending result #{idx} from {infilepath}", 3) if first_pass: shutil.copyfile(infilepath, destpath) # first file just copy to new name fa = open(destpath, 'a+', encoding="utf8") first_pass = False continue # the rest of the chunks, first strip header, and append with open(infilepath, encoding="utf8") as fi: buff = fi.read() lines = re.split(r'\n', buff) # .decode('utf-8') non_header_lines = '\n'.join(lines[1:]) # skip header line fa.write(non_header_lines) try: fa.close() except UnboundLocalError: pass
def delegated_build_bif_chunk(dirname, task_args, s3flag=None): """ this function is suitable for execution in lambda after delegation can also use by local machine even if s3 is used for output. """ # task_args: argsdict, archive_basename, chunk_idx, filelist args.argsdict = argsdict = task_args['argsdict'] chunk_idx = task_args['chunk_idx'] filelist = task_args['filelist'] # the list of files to be processed in this chunk. subdir = task_args['subdir'] chunk_name = task_args['chunk_name'] archive_basename = task_args['group_name'] archive = open_archive(argsdict, archive_basename) # if using s3, this will open the archive on s3. full_file_list = get_file_paths(archive) if not full_file_list: raise LookupError(f"archive {archive_basename} appears empty") pstyle_region_dict = argsdict.get('pstyle_region') pstyle_pattern = argsdict.get('pstyle_pattern', '') df_dict = {} # to save time, we will build the dataframe as a dict of dict, then in one swoop create the dataframe. # format is {1: {'lkadsjf': asdlkfj, }, 2: {...} ...) #filelist = filelist[0:5] for index, file_paths in enumerate(filelist): ballot_file_paths = re.split(r';', file_paths) _, _, ballot_id = analyze_ballot_filepath(ballot_file_paths[0]) df_dict[index] = create_bif_dict_by_reading_ballot(argsdict, ballot_id, index, archive_basename, archive, ballot_file_paths, pstyle_region_dict, pstyle_pattern, chunk_idx) # create the dataframe all at once. #print(df_dict) chunk_df = pd.DataFrame.from_dict(df_dict, "index") DB.save_data(data_item=chunk_df, dirname=dirname, subdir=subdir, name=chunk_name, format='.csv', s3flag=s3flag)
def remove_dirname_files_by_pattern(dirname, file_pat=None): """ remove files from dirpath that match regex file_pat """ dirpath = DB.dirpath_from_dirname(dirname) for filename in os.listdir(dirpath): full_path = os.path.join(dirpath, filename) if os.path.isfile(full_path) and not (file_pat and bool( re.search(file_pat, filename))): os.remove(full_path)
def write_html_summary(html_doc, filename='summary'): summary_path = DB.dirpath_from_dirname(filename) if not os.path.exists(summary_path): os.makedirs(summary_path) html_file_path = f"{summary_path}{filename}.html" html_file = open(html_file_path, 'w') html_file.write(html_doc.render()) html_file.close() return os.path.abspath(html_file_path)
def get_logfile_pathname(rootname='log'): """ lambdas can only open files in /tmp Used only within this module. """ if utils.on_lambda(): return f"/tmp/{rootname}.txt" else: dirpath = DB.dirpath_from_dirname('logs', s3flag=False) # this also creates the dir return f"{dirpath}{rootname}.txt"
def delete(self): banco = DB() try: c = banco.conexao.cursor() c.execute('DELETE FROM produtos WHERE id = %s', (self.id)) banco.conexao.commit() c.close() return 'Produto excluído com sucesso!' except: return 'Ocorreu um erro na exclusão do produto'
def load_one_marks_df(df_file): """ prior operation creates a separate NNNNN_marks_df.csv file for each ballot. now creating .csv file This supports incremental operation. """ #utils.sts(f"Loading df chunk {df_file}") #marks_df = DB.load_df(name=df_file, dirname='results') marks_df = DB.load_data(dirname='marks', name=df_file, format='.csv') return marks_df
def getAll(self): banco = DB() try: c = banco.conexao.cursor() c.execute('SELECT * FROM produtos') result = c.fetchall() c.close() return result except: return None
def extractvote_by_tasklists(argsdict: dict): """ ACTIVE This replaces the extractvotes function. given tasklists which exist in the extraction_tasks folder, Tasklists are generated by reviewing the BIF tables. Each tasklist creates a separate f"marks_{tasklist_name}.csv" file in the results folder. """ logs.sts('Extracting marks from extraction tasklists', 3) tasklists = DB.list_files_in_dirname_filtered(dirname='marks', subdir='tasks', file_pat=r'^[^~].*\.csv$', fullpaths=False) total_num = len(tasklists) utils.sts(f"Found {total_num} taskslists", 3) use_lambdas = argsdict['use_lambdas'] if use_lambdas: LambdaTracker.clear_requests() #clear_instructions(config_d.TASKS_BUCKET, Job.get_path_name()) biflist = get_biflist(no_ext=True) for bif_idx, bifname in enumerate(biflist): archive_name = re.sub(r'_bif', '', bifname) genmarks_tasks = [t for t in tasklists if t.startswith(archive_name)] for chunk_idx, tasklist_name in enumerate(genmarks_tasks): #---------------------------------- # this call may delegate to lambdas and return immediately # if 'use_lambdas' is enabled. # otherwise, it blocks until the chunk is completed. build_one_chunk(argsdict, dirname='marks', chunk_idx=chunk_idx, filelist=[tasklist_name], group_name=bifname, task_name='extractvote', incremental=False) #---------------------------------- if not chunk_idx and not bif_idx and argsdict['one_lambda_first']: if not wait_for_lambdas(argsdict, task_name='extractvote'): utils.exception_report("task 'extractvote' failed delegation to lambdas.") sys.exit(1) wait_for_lambdas(argsdict, task_name='extractvote') utils.combine_dirname_chunks_each_archive(argsdict, dirname='marks') logs.get_and_merge_s3_logs(dirname='marks', rootname='log', chunk_pat=r"_chunk_\d+", subdir="chunks") logs.get_and_merge_s3_logs(dirname='marks', rootname='exc', chunk_pat=r"_chunk_\d+", subdir="chunks")
def getByName(self, nome): banco = DB() try: c = banco.conexao.cursor() c.execute('SELECT * FROM clientes WHERE nome LIKE %s', ('%' + nome + '%')) result = c.fetchall() c.close() return result except: return None
def combine_archive_bifs(): """ BIF tables are constructed for each archive. Combine these into a single BIF table. Returns full_bif_df. NOTE! This function does not create any new files. """ utils.sts("Combining archive bifs", 3) return DB.combine_dirname_dfs(dirname='bif', file_pat=r'_bif\.csv')
def deleteByPedido(self, order_id): banco = DB() try: c = banco.conexao.cursor() c.execute('DELETE FROM pedidos_produtos WHERE pedidos_id = %s', (order_id)) banco.conexao.commit() c.close() return True except: return False
def gen_style_filepaths(style_num): #style_dict = DB.load_style(**{'name': style_num}) style_dict = DB.load_data(dirname='styles', subdir=style_num, name=f'{style_num}_style', silent_error=True) try: return style_dict['filepaths'] except TypeError: return None
def filterByName(self, name): banco = DB() try: c = banco.conexao.cursor() c.execute('SELECT * FROM produtos WHERE descricao LIKE %s', ('%' + name + '%')) result = c.fetchall() c.close() return result except: return None
def getByDesc(self, desc): banco = DB() try: c = banco.conexao.cursor() c.execute('SELECT * FROM produtos WHERE descricao LIKE %s', ('%' + desc + '%')) result = c.fetchall() c.close() return result except: return None
def update(self): banco = DB() try: c = banco.conexao.cursor() c.execute( 'UPDATE produtos SET descricao = %s , valor = %s , imagem = %s WHERE id = %s', (self.descricao, self.valor, self.imagem, self.id)) banco.conexao.commit() c.close() return 'Produto atualizado com sucesso!' except: return 'Ocorreu um erro na alteração do produto'