def get_fake_metadata_object(fn): '''get object with metadata gleaned only from the file system takes a full path''' d = get_fake_metadata(fn) obj = PieObject(title=d['title'], date=d['creation_date']) obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj
def get_metadata_object(fn, fakeonly=False): '''takes a filename, returns an object with relevant metadata gleaned from the file. If file type is unrecognised as handleable, then None will be returned''' ft = determine_file_type(fn) if ft == 'other': return None if fakeonly: return get_fake_metadata_object(fn) if ft == 'pdf': return get_pdf_metadata_object(fn) if ft == 'oxml_doc': return get_oxml_metadata_object(fn) if ft == 'odf_doc': return get_odf_metadata_object(fn) if ft in ('word_doc', 'hachoir_other'): return get_real_metadata_object(fn) return get_fake_metadata_object(fn)
def get_metadata_for_aspect(obj): '''You already have an object, you want to update it with file metadata''' assert obj.has_aspect('stored') or obj.has_aspect('cached') try: ft = obj.FileData_FileType except AttributeError: ft = determine_file_type(obj.FileData_FullPath) if ft == 'other': return None if ft == 'pdf': return get_pdf_metadata_for_aspect(obj) if ft == 'oxml_doc': return get_oxml_metadata_for_aspect(obj) if ft == 'odf_doc': return get_odf_metadata_for_aspect(obj) if ft in ('word_doc', 'hachoir_other'): return get_real_metadata_for_aspect(obj) return get_fake_metadata_for_aspect(obj)
def write_metadata_to_object(obj, **metadata): '''You have an object, you want to write metadata to its file. (works for pdfs, oxml and odf types)''' try: ft = obj.FileData_FileType except AttributeError: ft = determine_file_type(obj.FileData_FullPath) if not ft in ('pdf', 'odf_doc'):#, 'oxml_doc'): return False try: if ft == 'pdf': return write_pdf_metadata(obj) # OXML disabled as BeautifulSoup changes case of tags - bugger # elif ft == 'oxml_doc': # return write_oxml_metadata(obj) elif ft == 'odf_doc': return write_odf_metadata(obj) except: traceback.print_exc() return False
def get_real_metadata_object(fn): '''get object with metadata gleaned from internal file metadata''' #TODO: workaround for file locking bug if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn obj = get_fake_metadata_object(fn) d = get_real_metadata(fn) if not d: return obj obj.author = d['author'] obj.title = d['title'] obj.date = d['creation_date'] obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj