def spoof_pieobject(objtype="normal"): '''Generate a spoof PieObject''' if objtype == 'normal': t = random.choice(ipsum) a = random.choice(namelist) d = datetime.datetime.today() ro = PieObject(t, a, d) ro.FileData_FileName = random.choice(filenamelist) ro.FileData_Root = random.choice(rootlist) elif objtype == 'web': ro = PieObject() ro.WebData_Url = random.choice(urllist) ro.WebData_PageUrl = ro.WebData_Url ro.WebData_LinkText = random.choice(ipsum) ro.title = ro.WebData_LinkText ro.aspects['onweb'] = True elif objtype in ('webfull', 'desktop', 'pdffull'): t = random.choice(ipsum) a = random.choice(namelist) d = datetime.datetime.today() ro = PieObject(t, a, d) ro.WebData_Url = random.choice(urllist) ro.WebData_PageUrl = ro.WebData_Url ro.WebData_LinkText = t + ' [link]' ro.FileData_Root = 'cachedir' ro.aspects['onweb'] = True ro.MakeBibData() ro.add_tag('Test') ro.add_tags(('Foo', 'Bar')) return ro
def get_fake_metadata_object(fn): '''get object with metadata gleaned only from the file system takes a full path''' d = get_fake_metadata(fn) obj = PieObject(title=d['title'], date=d['creation_date']) obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject(title=data['title'], author=data['author'], date=data['creation_date']) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject( title=data['title'], author=data['author'], date=data['creation_date'] ) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
creation_date = fakeobj.FileData_DateCreated if reader.Info.Author: author = unicode( reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: author = u'' if reader.Info.Title: title = unicode( reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: title = u'' obj = PieObject( title = title, author = author, date = creation_date) obj.FileData_DateCreated = creation_date obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj def pdfrw_metadata(fn): reader = PdfReader(fn) # assert len(reader.Info.CreationDate) > 0 cd = reader.Info.CreationDate.split(':')[1] #get the 'good' bit # md = reader.Info.ModDate.split(':')[1] creation_date = datetime.datetime.strptime("%s %s %s %s %s" % ( cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12] ), "%Y %m %d %H %M") return { 'author': unicode(reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV), 'title': unicode(reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV), 'creation_date': creation_date, 'creation_date_guessed': False,