def import_dump(self, key): if not os.path.exists('exports'): os.mkdir('exports') imgs=[] srcimgs=[] blgs=[] #TODO: find all xml files in folder if os.path.exists('exports/images.xml'): #TODO: download image from original location imgrec = util.inout.loadImages('exports/images.xml') srcimgs = [picture.opendump(rec) for rec in imgrec] # FIXME: workaround wegen offline: bilder werden komplett mitkopiert for p in srcimgs: if not os.path.exists('images/{}'.format(p.filename)): os.rename('exports/{}'.format(p.filename), 'images/{}'.format(p.filename)) imgs.append(p) # remove images xml records so that missing images cannot be imported again os.remove('exports/images.xml') if os.path.exists('exports/blogs.xml'): blgrec = util.inout.loadBlogs('exports/blogs.xml') blgs = [tumblr.opendump(rec) for rec in blgrec] # reify image references made by blogs for t in blgs: index.clean_img_refs(t) # remove xml file because import is successful os.remove('exports/blogs.xml') # now that we have our blogs imported, we can reify blog/img references # in img instances # TODO: reification of source blogs, interblog references, interimg links!! for p in srcimgs: index.clean_sources(p) p.clean_links() self.message('imported {} images and {} blogs.'.format( len(srcimgs), len(blgs))) # compute similarities with present images self.message('compute similarities with present images..') bestsim=0 # sim stat for p in imgs: sims = {} for q in picture.pictures(): if q != p: sims[q] = p.similarity(q) minsim,maxsim = (min(sims.values()), max(sims.values())) p.relates.update({q:s for q,s in sims.items() if s > maxsim-(maxsim-minsim)/3}) # keep track of best match bestsim = max(bestsim, maxsim) # remove xml files (now left without their actual images...) # and repool if len(imgs)>0: self.pool = imgs self.redraw = True self.message('\n'.join(['Imported {} image records with {} new images'.format( len(srcimgs), len(imgs)), 'featured by {} blogs.'.format(len(blgs)), 'Highest similarity between old and new image was {:.2f}'.format(bestsim)]), confirm=True)
def loadBlogs(filename): print 'read in xml...' records = inout.loadBlogs(filename) print 'instantiate blog objects from imported records...' blgs = [tumblr.opendump(rec) for rec in records] # replace string identifiers in image sources lists # with newly created Blog instances #imgs = picture.pictures() #for p in imgs: #p.clean_sources() return blgs