def records(self, gs_id, tab=0): '''Returns a list of GoogleLostRecord objects.''' if __debug__: log('getting entries from Google spreadsheet') sheet_rows = self._content(gs_id, tab) if not sheet_rows: if __debug__: log('Google returned empty spreadsheet') return [] # We have data from the spreadsheet. Convert it to a list of records. if __debug__: log('building records from {} rows', len(sheet_rows) - 1) results = [] num_columns_expected = len(_COL_INDEX) # First row is the title row, so we skip it. for row in sheet_rows[1:]: if not row: if __debug__: log('skipping null row') continue # When a row from a Google spreadsheet has empty cells at the tail # end, the list of values we get back is not the full length; it's # only as long as the last column that has a value. We still want # to process the row as much as we can, so we try to cope. row = padded(row, len(_COL_INDEX)) if row[_COL_INDEX['item_barcode']] == '': # We use barcodes to track items, so we can't go on without it. if __debug__: log('skipping row with missing barcode: {}', row) continue results.append(GoogleLostRecord(row=row)) return results
def on_escape(self, event): keycode = event.GetKeyCode() if keycode == wx.WXK_ESCAPE: if __debug__: log('got Escape') self.on_cancel_or_quit(event) else: event.Skip()
def email_body(record, sheet_url): # Helper function def value(field): return '-- none --' if field == '' else field if __debug__: log('formatting email body') summary = ''' Title: {} Author: {} Call #: {} Barcode: {} Location code: {} Location name: {} Requester name: {} Requester email: {} Patron type: {} '''.format(value(record.item_title), value(record.item_author), value(record.item_call_number), value(record.item_barcode), value(record.item_location_code), value(record.item_location_name), value(record.requester_name), value(record.requester_email), value(record.requester_type)) joke = random_pun() return ''' Lost It! was just run and it discovered a new lost item recorded in TIND: {} Here is the URL for the spreadsheet of lost items: {} {} '''.format(summary, sheet_url, "---\nAnd here is a random pun:\n" + joke if joke else '')
def init_credentials(self): '''Initializes the credentials to use for accessing this service.''' try: if __debug__: log('initializing credentials') self._credentials = MicrosoftCredentials().creds() except Exception as ex: raise AuthFailure(str(ex))
def _resized_image(self, file): (max_width, max_height) = self._max_dimensions file_ext = filename_extension(file) name_tail = '.handprint' + file_ext new_file = file if name_tail in file else filename_basename( file) + name_tail if path.exists(new_file) and readable(new_file): from handprint.images import image_dimensions (image_width, image_height) = image_dimensions(new_file) if image_width < max_width and image_height < max_height: inform(f'Using reduced image found in {relative(new_file)}') return new_file else: # We found a "-reduced" file, perhaps from a previous run, but # for the current set of services, dimension are too large. if __debug__: log('existing resized file larger than' + f' {max_width}x{max_height}: {new_file}') inform(f'Dimensions too large; reducing dimensions: {relative(file)}') from handprint.images import reduced_image_dimensions (resized, error) = reduced_image_dimensions(file, new_file, max_width, max_height) if error: alert(f'Failed to re-dimension {relative(file)}: {error}') return None return resized
def _save(self, result, file): # First perform some sanity checks. if result is None: warn(f'No data for {file}') return if isinstance(result, tuple): # Assumes 2 elements: data, and error (data, error) = result if error: alert(f'Error: {error}') warn(f'Unable to write {file}') return else: result = data if __debug__: log(f'writing output to file {relative(file)}') if isinstance(result, str): with open(file, 'w', encoding='utf-8') as f: f.write(result) elif isinstance(result, io.BytesIO): with open(file, 'wb') as f: shutil.copyfileobj(result, f) else: # There's no other type in the code, so if we get here ... raise InternalError( 'Unexpected data in save_output() -- please report this.')
def __init__(self): cfile = path.join(self.credentials_dir(), credentials_filename('microsoft')) if __debug__: log(f'credentials file for microsoft is {cfile}') if not path.exists(cfile): raise AuthFailure( 'Credentials for Microsoft have not been installed') elif not readable(cfile): raise AuthFailure( f'Microsoft credentials file unreadable: {cfile}') try: with open(cfile, 'r') as file: creds = json.load(file) if 'endpoint' in creds: endpoint = creds['endpoint'].rstrip('/') if not endpoint.startswith('http'): endpoint = 'https://' + endpoint else: if __debug__: log('endpoint not found; using default') endpoint = _DEFAULT_ENDPOINT creds['endpoint'] = endpoint self.credentials = creds except Exception as ex: raise AuthFailure( f'Unable to parse Microsoft exceptions file: {str(ex)}')
def _smaller_file(self, file): if not file: return None file_ext = filename_extension(file) name_tail = '.handprint' + file_ext new_file = file if name_tail in file else filename_basename( file) + name_tail if path.exists(new_file): from handprint.images import image_size if image_size(new_file) < self._max_size: inform(f'Reusing resized image found in {relative(new_file)}') return new_file else: # We found a ".handprint.ext" file, perhaps from a previous run, # but for the current set of services, it's larger than allowed. if __debug__: log('existing resized file larger than' + f' {self._max_size}b: {new_file}') inform(f'Size too large; reducing size: {relative(file)}') from handprint.images import reduced_image_size (resized, error) = reduced_image_size(file, new_file, self._max_size) if error: alert(f'Failed to resize {relative(file)}: {error}') return None return resized
def locked_delete(self): '''Delete Credentials file. Args: credentials: Credentials, the credentials to store. ''' if __debug__: log('deleting encryption key') keyring.delete_password(self._service_name, self._user_name)
def return_iiif_content(barcode, rest, person): '''Return the manifest file for a given item.''' item = Item.get(Item.barcode == barcode) loan = Loan.get_or_none(Loan.item == item, Loan.user == person.uname) if loan and loan.state == 'active': record_request(barcode) url = _IIIF_BASE_URL + '/' + urls_restored(rest, barcode) if url in _IIIF_CACHE: content, ctype = _IIIF_CACHE[url] data = BytesIO(content) log(f'returning cached /iiif/{barcode}/{rest} for {person.uname}') return send_file(data, ctype = ctype, size = len(content)) # Read the data from our IIIF server instance & send it to the client. log(f'getting /iiif/{barcode}/{rest} from server') response, error = net('get', url) if not error: if url.endswith('json'): # Always rewrite URLs in any JSON files we send to the client. content = urls_rerouted(response.text, barcode).encode() ctype = 'application/json' else: content = response.content ctype = 'image/jpeg' _IIIF_CACHE[url] = (content, ctype) data = BytesIO(content) log(f'returning content of /iiif/{barcode}/{rest} for {person.uname}') return send_file(data, ctype = ctype, size = len(content)) else: log(f'error {str(error)} accessing {url}') return else: log(f'{person.uname} does not have {barcode} loaned out') redirect(f'{dibs.base_url}/notallowed')
def article_metadata(self, article): (response, error) = net('get', _DATACITE_API_URL + article.doi) if error: if __debug__: log(f'error from datacite for {article.doi}: {str(error)}') return None elif not response: warn(f'Empty response from DataCite for {article.doi}') return None json = response.json() xmldict = xmltodict.parse( base64.b64decode(json['data']['attributes']['xml'])) date = json['data']['attributes']['registered'] if 'dates' in xmldict['resource']: xmldict['resource']['dates']['date']['#text'] = date else: xmldict['resource']['dates'] = {'date': article.date} xmldict['resource']['volume'] = volume_for_year( xmldict['resource']['publicationYear']) xmldict['resource']['file'] = article.basename + '.pdf' xmldict['resource']['journal'] = xmldict['resource'].pop('publisher') xmldict['resource']['e-issn'] = self.issn xmldict['resource']["rightsList"] = [{ "rights": "Creative Commons Attribution 4.0", "rightsURI": "https://creativecommons.org/licenses/by/4.0/legalcode" }] xmldict['resource'].pop('@xmlns') xmldict['resource'].pop('@xsi:schemaLocation') return xmldict
def _do_main_work(self): # Gather up some things and get prepared. targets = self.targets_from_arguments() if not targets: alert_fatal('No images to process; quitting.') raise CannotProceed(ExitCode.bad_arg) num_targets = len(targets) inform(f'Given {pluralized("image", num_targets, True)} to work on.') inform('Will apply results of {}: {}'.format( pluralized('service', len(self.services), True), ', '.join(self.services), num_targets)) inform( f'Will use credentials stored in {Credentials.credentials_dir()}/.' ) if self.extended: inform('Will save extended results.') num_threads = min(self.threads, len(self.services)) inform(f'Will use up to {num_threads} process threads.') # Get to work. if __debug__: log('initializing manager and starting processes') import shutil print_separators = num_targets > 1 rule = '─' * (shutil.get_terminal_size().columns or 80) for index, item in enumerate(targets, start=1): # Check whether we've been interrupted before doing another item. raise_for_interrupts() # Process next item. if print_separators: inform(rule) self._manager.run_services(item, index, self.base_name) if print_separators: inform(rule)
def _article_tuples(self, xml): '''Parse the XML input, assumed to be from micropublication.org, and create a list of `Article` records. ''' if __debug__: log(f'parsing XML data') articles = [] if type(xml) == str: # The micropublication xml declaration explicit uses ascii encoding. xml = xml.encode('ascii') try: for element in etree.fromstring(xml).findall('article'): doi = (element.find('doi').text or '').strip() pdf = (element.find('pdf-url').text or '').strip() jats = (element.find('jats-url').text or '').strip() image = (element.find('image-url').text or '').strip() title = (element.find('article-title').text or '').strip() date = element.find('date-published') if date != None: year = (date.find('year').text or '').strip() month = (date.find('month').text or '').strip() day = (date.find('day').text or '').strip() date = year + '-' + month + '-' + day else: date = '' basename = tail_of_doi(doi) status = 'complete' if all([pdf, jats, doi, title, date ]) else 'incomplete' articles.append( Article(self.issn, doi, date, title, basename, pdf, jats, image, status)) except Exception as ex: if __debug__: log(f'could not parse XML from server') alert('Unexpected or badly formed XML returned by server') return articles
def help_pages(filename = 'index.html'): '''Return a static file''' person = person_from_environ(request.environ) required_roles(person) p = os.path.join(_SERVER_ROOT, 'htdocs', 'help') log(f'returning help file {filename} {p}') return static_file(filename, root = p)
def _do_preflight(self): '''Check the option values given by the user, and do other prep.''' from handprint.network import network_available if not network_available(): alert_fatal('No network connection.') raise CannotProceed(ExitCode.no_network) if self.from_file: if not exists(self.from_file): alert_fatal(f'File not found: {self.from_file}') raise CannotProceed(ExitCode.bad_arg) if not readable(self.from_file): alert_fatal(f'File not readable: {self.from_file}') raise CannotProceed(ExitCode.file_error) if self.output_dir: if isdir(self.output_dir): if not writable(self.output_dir): alert_fatal(f'Directory not writable: {self.output_dir}') raise CannotProceed(ExitCode.file_error) else: os.mkdir(self.output_dir) if __debug__: log(f'created output_dir directory {self.output_dir}')
def _image_from_file(self, file_path): '''Helper function for subclasses to read image files. Returns a tuple, (image, error), where "error" is a TRResult with a non-empty error field value if an error occurred, and "image" is the bytes of the image if it was successfully read. ''' def error_result(error_text): return (None, TRResult(path=file_path, data={}, text='', error=error_text, boxes=[])) rel_path = relative(file_path) if not readable(file_path): return error_result(f'Unable to read file: {rel_path}') if __debug__: log(f'reading {rel_path} for {self.name()}') with open(file_path, 'rb') as image_file: image = image_file.read() if len(image) == 0: return error_result(f'Empty file: {rel_path}') if len(image) > self.max_size(): text = f'Exceeds {self.max_size()} byte limit for service: {rel_path}' return error_result(text) width, height = imagesize.get(file_path) if __debug__: log(f'image size is width = {width}, height = {height}') if self.max_dimensions(): max_width, max_height = self.max_dimensions() if width > max_width or height > max_height: text = f'Dimensions {width}x{height} exceed {self.name()} limits: {rel_path}' return error_result(text) return (image, None)
def articles_from(self, doi_file): '''Returns a list of `Article` tuples from a file of DOIs.''' if __debug__: log(f'reading {doi_file}') requested_dois = [] with open(doi_file, 'r') as file: requested_dois = [line.strip() for line in file] num = len(requested_dois) # I'd use pluralized() here, but it matches case when it adds a 's', # and is confused by DOI which is an acronym. Must add 's' ourselves. inform(f'Found {num} DOI{"s" if num > 1 else ""} in {doi_file}.') if not requested_dois: if __debug__: log(f'could not read any lines from {doi_file}') return [] all_articles = self.all_articles() all_dois = [article.doi for article in all_articles] skipped = 0 for doi in requested_dois: if doi not in all_dois: warn( f'Skipping "{doi}" because it is unknown for this journal.' ) skipped += 1 if skipped: kept = num - skipped inform( f'Using {kept} DOI{"s" if kept > 1 else ""} from {doi_file}.') return [ article for article in all_articles if article.doi in requested_dois ]
def _api(self, get_or_post, url, headers, data = None, polling = False): from handprint.network import net response, error = net(get_or_post, url, headers = headers, data = data, polling = polling) if isinstance(error, NetworkFailure): if __debug__: log(f'network exception: {str(error)}') return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits # The headers have a Retry-After number in seconds in some cases # but not others, so we default to something just in case. sleep_time = 20 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log(f'sleeping for {sleep_time} s and retrying') wait(sleep_time) return self._api(get_or_post, url, headers, data, polling) # Recurse elif error: if isinstance(error, ServiceFailure): # If it was an error generated by the Microsoft service, there # will be additional details in the response. Check for it. try: json_response = response.json() if json_response and json_response.get('error', None): error = json_response['error'] if 'code' in error: code = error['code'] message = error['message'] raise ServiceFailure('Microsoft returned error code ' + code + ' -- ' + message) except: pass raise error else: return response
def on_cancel_or_quit(self, event): if __debug__: log('got Cancel') self._cancel = True self.return_values() # self.Destroy() self.return_values() self.EndModal(event.EventObject.Id)
def random_pun(): if __debug__: log('getting a random joke from https://icanhazdadjoke.com') (resp, error) = net('get', 'https://icanhazdadjoke.com/', timeout=10, headers={'Accept': 'text/plain'}) text = resp.text.encode('ascii', 'ignore').decode() if not error else None return text
def on_help(self, event): if __debug__: log('opening Help window') wx.BeginBusyCursor() help_file = path.join(datadir_path(), "help.html") if readable(help_file): webbrowser.open_new("file://" + help_file) wx.EndBusyCursor() return True
def save_credentials(self, service, supplied_file): if not path.isdir(Credentials.creds_dir): if __debug__: log('creating credentials dir: {}.', Credentials.creds_dir) makedirs(Credentials.creds_dir) dest_file = path.join(Credentials.creds_dir, credentials_filename(service)) copy_file(supplied_file, dest_file)
def save_keyring_credentials(api_key, user_id, ring=_KEYRING): '''Saves the user's credentials.''' if sys.platform.startswith('win'): keyring.set_keyring(WinVaultKeyring()) if sys.platform.startswith('darwin'): keyring.set_keyring(Keyring()) value = _encoded(api_key, user_id) if __debug__: log(f'storing "{value}" to keyring {_KEYRING}') keyring.set_password(ring, getpass.getuser(), value)
def main(user='******', pswd='P', no_color=False, no_gui=False, no_keyring=False, no_mail=False, no_sheet=False, reset_keys=False, version=False, debug=False): '''Lost It!''' # Our defaults are to do things like color the output, which means the # command line flags make more sense as negated values (e.g., "no-color"). # However, dealing with negated variables in our code is confusing, so: use_color = not no_color use_keyring = not no_keyring use_gui = not no_gui view_sheet = not no_sheet send_mail = not no_mail # We use default values that provide more intuitive help text printed by # plac. Rewrite the values to things we actually use. if user == 'U': user = None if pswd == 'P': pswd = None # Process the version argument first, because it causes an early exit. if version: print('{} version {}'.format(lostit.__title__, lostit.__version__)) print('Author: {}'.format(lostit.__author__)) print('URL: {}'.format(lostit.__url__)) print('License: {}'.format(lostit.__license__)) sys.exit() # Configure debug logging if it's turned on. if debug: if __debug__: set_debug(True) # Switch between different ways of getting information from/to the user. if use_gui: controller = LostItControlGUI() accesser = AccessHandlerGUI(user, pswd, use_keyring, reset_keys) notifier = MessageHandlerGUI() tracer = ProgressIndicatorGUI() else: controller = LostItControlCLI() accesser = AccessHandlerCLI(user, pswd, use_keyring, reset_keys) notifier = MessageHandlerCLI(use_color) tracer = ProgressIndicatorCLI(use_color) # Start the worker thread. if __debug__: log('starting main body thread') controller.start( MainBody(view_sheet, send_mail, debug, controller, accesser, notifier, tracer))
def keyring_credentials(ring=_KEYRING): '''Looks up the user's credentials.''' if sys.platform.startswith('win'): keyring.set_keyring(WinVaultKeyring()) if sys.platform.startswith('darwin'): keyring.set_keyring(Keyring()) value = keyring.get_password(ring, getpass.getuser()) if __debug__: log(f'got "{value}" from keyring {_KEYRING}') return _decoded(value) if value else (None, None)
def _process_arguments(self): if self.doi_file: if not readable(self.doi_file): raise RuntimeError(f'File not readable: {self.doi_file}') if not nonempty(self.doi_file): warn(f'File is empty: {self.doi_file}') if not path.isabs(self.output_dir): self.output_dir = path.realpath( path.join(os.getcwd(), self.output_dir)) if path.isdir(self.output_dir): if not writable(self.output_dir): raise RuntimeError( f'Directory not writable: {self.output_dir}') else: if path.exists(self.output_dir): raise ValueError(f'Not a directory: {self.output_dir}') self.output_dir = path.join(self.output_dir, self.journal.archive_basename) if self.after: parsed_date = None try: parsed_date = parsed_datetime(self.after) except Exception as ex: raise RuntimeError(f'Unable to parse date: {str(ex)}') if parsed_date: if __debug__: log(f'parsed after_date as {parsed_date}') self.after = parsed_date else: # parsed_datetime(...) returned None, which means it failed. raise RuntimeError(f'Invalid date: {self.after}') if self.do_validate: data_dir = path.join(module_path('pubarchiver'), 'data') dtd_dir = path.join(data_dir, _INTERNAL_DTD_DIR) dtd_file = path.join(dtd_dir, _JATS_DTD_FILENAME) if not path.exists(data_dir) or not path.isdir(data_dir): raise RuntimeError(f'Data directory is missing: {data_dir}') elif not path.exists(dtd_dir) or not path.isdir(dtd_dir): warn( 'Cannot find internal DTD directory -- validation turned off' ) self.do_validate = False elif not path.exists(dtd_file) or not readable(dtd_file): warn( 'Cannot find internal copy of JATS DTD -- validation turned off' ) self.do_validate = False else: current_dir = os.getcwd() try: os.chdir(dtd_dir) if __debug__: log(f'using JATS DTD at {dtd_file}') self._dtd = etree.DTD(dtd_file) finally: os.chdir(current_dir)
def __init__(self, cfg_file): self._cfg = ConfigParser() try: with open(cfg_file) as f: if __debug__: log('reading "{}"', cfg_file) self._cfg.readfp(f) except IOError: if __debug__: log('"{}" not found', cfg_file) warnings.warn('file "{}" not found'.format(cfg_file))
def person_plugin_wrapper(*args, **kwargs): person = person_from_environ(request.environ) if person is None: log(f'person is None') return page('error', summary = 'authentication failure', message = f'Unrecognized user identity.') if 'person' in inspect.getfullargspec(route.callback)[0]: kwargs['person'] = person return callback(*args, **kwargs)
def open_file(file): '''Open document with default application in Python.''' # Code originally from https://stackoverflow.com/a/435669/743730 if __debug__: log('opening file {}', file) if sys.platform.startswith('darwin'): subprocess.call(('open', file)) elif os.name == 'nt': os.startfile(file) elif os.name == 'posix': subprocess.call(('xdg-open', file))
def parent_key(self, record, file): '''Safely returns the parent key of the record item, or None.''' f = antiformat(file) if 'data' not in record: if __debug__: log(f'no "data" in record for {f}') return None if 'parentItem' not in record['data']: if __debug__: log(f'unexpected record for {f}: ' + str(record["data"])) return None return record['data']['parentItem']