def __init__(self, file_path): """ Setup the pdf parser to be able to read the passed in file. Args: file_path (string): Path to the pdf that will be worked on. """ try: self.file_path = file_path with open(self.file_path, 'rb') as f: self.parser = PDFParser(f) self.doc = PDFDocument() self.parser.set_document(self.doc) self.doc.set_parser(self.parser) self.doc.initialize('') self.rsrcmgr = PDFResourceManager() self.laparams = LAParams() self.laparams.char_margin = 1.0 self.laparams.word_margin = 1.0 self.device = PDFPageAggregator(self.rsrcmgr, laparams=self.laparams) self.interpreter = PDFPageInterpreter(self.rsrcmgr, self.device) except Exception as e: logger.logTraceback('---' + str(e) + '---', e) abort(500, description=e)
def keep_alive(): try: keep_alive_scheduler.job_started = True keep_alive_scheduler.resetWaitTime() return make_response(jsonify({'success':True}), 200) except Exception as e: logger.logTraceback('Endpoint: \'/keepAlive\'\n---'+str(e)+'---', e) abort(500, description=e)
def generate_employee_info(self, pdf_string): """ Parses the pdf text, and returns a list of employees with their full name and the number of hours they worked, according to the payroll sheet. The parser then saves the list to this object's employees attribute. Args: pdf_string (string): The string generated from a pdf that is used to collect all employee information. """ logger.logEvent( 'Starting PDF parsing to generate list of employees and their info...' ) pdf_list = pdf_string.split(' ') #parsing_start_time = time.time() for name in settings.user_settings['EMPLOYEE_NAMES']: # Initialize current employee's stats name_index = 0 employee_id = 0 employee_hours = 0.0 # Get first and last name(s) of current employee first_name = name.split(' ')[0] last_names = [] if len(name.split(' ')) > 1: last_names = name.split(' ')[1:] try: name_index = self.get_name_index(first_name, last_names, pdf_list) # If index is 0, employee wasn't found/not a valid employee name, # so skip finding the rest of the employee data if name_index == 0: continue employee_id = self.get_employee_id(name_index, pdf_list) employee_hours = self.get_employee_hours(name_index, pdf_list) employee_name = first_name + ' ' + (' '.join(last_names)) self.employees.append( [employee_name, employee_id, employee_hours, "0%", "$0"]) except Exception as e: data = "Index: {} ID: {} Hours: {} Name: {}".format( name_index, employee_id, employee_hours, name) logger.logTraceback('Data: ' + data + '\n---' + str(e) + '---', e) logger.logEvent('Failed to add employee: (' + data + ')', log_type=logger.logging.ERROR)
def startJob(): """ Setup the thread and the job to let the server stay alive as long as client is active. """ try: schedule.every( settings.app_settings['KEEP_ALIVE_TIMEOUT']).seconds.do(runJob) thread = threading.Thread(target=scheduleThread, daemon=True) thread.start() except Exception as e: logger.logTraceback('---' + str(e) + '---', e) abort(500, description=e)
def creator(): try: return """ <link rel="shortcut icon" href="/static/favicon.ico"> <p>Created By: Nicholas Evans</p> <p>Date Started: May 19th 2020</p> <p>Date of Initial Release: June 27th 2020</p> <p>Server Version: """+str(settings.app_settings['SERVER_VERSION'])+"""</p> <p>Client Version: """+str(settings.app_settings['CLIENT_VERSION'])+"""</p> """ except Exception as e: logger.logTraceback('Endpoint: \'/settings\'\n---'+str(e)+'---', e) abort(500, description=e)
def changelog(): try: formatted_data = '' with open(settings.app_settings["CHANGELOG_PATH"], 'r') as f: for line in f.readlines(): formatted_data += "<pre>"+line+"</pre>" return """ <link rel="shortcut icon" href="/static/favicon.ico"> """+formatted_data+""" """ except Exception as e: logger.logTraceback('Endpoint: \'/settings\'\n---'+str(e)+'---', e) abort(500, description=e)
def generate_data(): logger.logEvent('Employee list requested...') logger.logEvent('Validating request...') #Check if file exists in request if 'file' not in request.files: return make_response(jsonify({'success': False, 'status': 'No File'}), 400) file = request.files['file'] #Check if file is an empty string (invalid) if file.filename == '': return make_response(jsonify({'success': False, 'status': 'No File'}), 400) traceback_data = "" if file and utils.is_allowed_file(file.filename): filename = secure_filename(file.filename) try: file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) traceback_data += "File Path: {}".format(file_path) #Save file to uploads folder file.save(file_path) #Generate report reader = PDFToText(file_path) employees = Employees() #Extract Data from pdf report extracting_start_time = time.time() pdf_string = reader.extract_pdf_text() logger.logEvent('Completed text extraction ({}s)'.format(round(time.time() - extracting_start_time, 4))) parsing_start_time = time.time() employees.generate_employee_info(pdf_string) logger.logEvent('Completed parsing ({}s)'.format(round(time.time()-parsing_start_time, 4))) logger.logEvent('Employee list response sent!') return make_response(jsonify({'success': True, 'response':{'employees': employees.employees}}), 200) except Exception as e: logger.logTraceback('Endpoint: \'/generate\'\n'+traceback_data+'\n---'+str(e)+'---', e) abort(500, description=e)
def update_settings(): try: names = request.json names_added = list(set(names) - set(settings.user_settings['EMPLOYEE_NAMES'])) names_removed = list(set(settings.user_settings['EMPLOYEE_NAMES']) - set(names)) settings.update_user_setting('EMPLOYEE_NAMES', names) settings.write_updated_settings_to_file() if names_added != []: logger.logEvent('Employee names added to EMPLOYEE_NAMES setting: '+str(', '.join('"'+n+'"' for n in names_added))) if names_removed != []: logger.logEvent('Employee names removed from EMPLOYEE_NAMES setting: '+str(', '.join('"'+n+'"' for n in names_removed))) return make_response(jsonify({'success': True})) except Exception as e: logger.logTraceback('Endpoint: \'/settings_update\'\nUpdating Employee Names: \n---'+str(e)+'---', e) abort(500, description=e)
def extract_pdf_text(self): """ Extract the parser's file, only returning anything from a text object. Returns: string: The extracted text from the pdf. """ extracted_text = '' logger.logEvent('Beginning extraction of PDF to Text...') try: for page in self.doc.get_pages(): self.interpreter.process_page(page) layout = self.device.get_result() for lt_obj in layout: if isinstance(lt_obj, LTTextBox) or isinstance( lt_obj, LTTextLine): extracted_text += lt_obj.get_text().replace('\n', ' ') return extracted_text except Exception as e: logger.logTraceback('---' + str(e) + '---', e) abort(500, description=e)
def get_settings(): try: return make_response(jsonify({'success':True, 'settings':settings.user_settings['EMPLOYEE_NAMES']}), 200) except Exception as e: logger.logTraceback('Endpoint: \'/settings_get\'\n---'+str(e)+'---', e) abort(500, description="Request Settings: \n"+e)
def settings_controller(): try: return app.send_static_file('html/settings.html') except Exception as e: logger.logTraceback('Endpoint: \'/settings\'\n---'+str(e)+'---', e) abort(500, description=e)
def index(): try: return app.send_static_file('html/index.html') except Exception as e: logger.logTraceback('Endpoint: \'/\'\n---'+str(e)+'---', e) abort(500, description=e)