def write_hidden_pdf_if_a_binary(self, binary, filepath, pointer, filetype): # in some cases, contentDM gives an xml instead of a binary. # it's easier to discern whether something is an xml, than to discern # whether its one of millions of types of valid binaries. # we're going to try to decode the binary or xml into unicode. # if it succeeds, it's an xml & we'll discard it. # if it fails, it's a binary, which we'll write to file. try: binary.decode('utf-8') return False except UnicodeDecodeError: CdmAPI.write_binary_to_file(binary, filepath, pointer, filetype) print(filepath, pointer, 'wrote root hidden_pdf') return True
def process_binary(self, target_dir, pointer, filetype): files = [file for root, dirs, files in self.tree_snapshot for file in files if target_dir == root] if '{}.{}'.format(pointer, filetype) not in files and '{}.{}'.format(pointer, filetype.lower()) not in files: try: CdmAPI.write_binary_to_file( CdmAPI.retrieve_binary(self.alias, pointer), target_dir, pointer, filetype) print('wrote', self.alias, pointer, filetype) except urllib.error.HTTPError: print(self.alias, pointer, 'HTTP error caught on binary') self.unavailable_binaries.add((pointer, filetype))
def write_hidden_pdf_if_a_binary(self, binary, filepath, pointer, filetype): # in some cases, contentDM gives an xml instead of a binary. # it's easier to discern whether something is an xml, than to discern # whether its one of millions of types of valid binaries. # we're going to try to decode the binary or xml into unicode. # if it succeeds, it's an xml & we'll discard it. # if it fails, it's a binary, which we'll write to file. try: binary.decode('utf-8') return False except UnicodeDecodeError: CdmAPI.write_binary_to_file(binary, filepath, pointer, filetype) logging.info('{} {} root hidden_pdf written'.format( filepath, pointer)) return True
def process_binary(self, target_dir, pointer, filetype): files = [ file for root, dirs, files in self.tree_snapshot for file in files if target_dir == root ] if '{}.{}'.format(pointer, filetype) not in files and '{}.{}'.format( pointer, filetype.lower()) not in files: try: CdmAPI.write_binary_to_file( CdmAPI.retrieve_binary(self.alias, pointer), target_dir, pointer, filetype) logging.info('{} {}.{} written'.format(self.alias, pointer, filetype)) except urllib.error.HTTPError: logging.warning('{} {} HTTP error caught on binary'.format( self.alias, pointer))