def pdfrw_object(fn): '''hachoir doesn't do pdf''' if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn fakeobj = get_fake_metadata_object(fn) try: reader = PdfReader(fn) except Exception, exc: traceback.print_exc(file=open(os.path.join(SYSDIR, 'dbgop'), 'w')) return None
def get_real_metadata_object(fn): '''get object with metadata gleaned from internal file metadata''' #TODO: workaround for file locking bug if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn obj = get_fake_metadata_object(fn) d = get_real_metadata(fn) if not d: return obj obj.author = d['author'] obj.title = d['title'] obj.date = d['creation_date'] obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj
def get_odf_metadata_object(fn): # if sys.platform == 'win32': # newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) # if os.path.isfile(newfn): # newfn = auto_increment_fn(newfn) # shutil.copyfile(fn, newfn) # fn = newfn obj = get_fake_metadata_object(fn) try: d = get_odf_metadata(fn) except: traceback.print_exc() print 'Parsing oxml document %s failed' % fn return obj obj.author = d['author'] obj.title = d['title'] obj.date = d['creation_date'] obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] if d['description']: obj.BibData_Annote = d['description'] obj.FileData_FileType = 'odf_doc' obj.FileData_FileName = os.path.basename(fn) return obj
def get_oxml_metadata_object(fn): # if sys.platform == 'win32': # newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) # if os.path.isfile(newfn): # newfn = auto_increment_fn(newfn) # shutil.copyfile(fn, newfn) # fn = newfn obj = get_fake_metadata_object(fn) try: d = get_oxml_metadata(fn) except: traceback.print_exc() print 'Parsing oxml document %s failed' % fn return obj obj.author = d['author'] obj.title = d['title'] obj.date = d['creation_date'] obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] if d['description']: obj.BibData_Annote = d['description'] obj.FileData_FileType = 'oxml_doc' obj.FileData_FileName = os.path.basename(fn) return obj