def extract_shelfmarks(args): settings = get_appsettings(args.configuration) setup_logging(args.configuration) logger = logging.getLogger('explorethestacks') logger.info('Extracting shelf-marks') engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) dbsession = DBSession() count = 0 with transaction.manager: for shelfmark in dbsession.query(ShelfMark): shelfmark.books = [] with transaction.manager: dbsession.query(ShelfMark).delete() with transaction.manager: for book in dbsession.query(Book): for title in book.attrs['shelfmarks']: shelfmark = dbsession.query(ShelfMark).filter( ShelfMark.title == title).first() if not shelfmark: shelfmark = ShelfMark(title=title) dbsession.add(shelfmark) shelfmark.books.append(book) count = count + 1 if count % 10000 == 0: dbsession.flush() logger.debug('%i books processed' % (count)) logger.debug('%i books processed' % (count)) prefix_len = len( os.path.commonprefix([sm.title for sm in dbsession.query(ShelfMark)])) with transaction.manager: for shelfmark in dbsession.query(ShelfMark): shelfmark.title = shelfmark.title[prefix_len:] logger.info('Shelf-marks extracted')
def create_shelves(args): settings = get_appsettings(args.configuration) setup_logging(args.configuration) logger = logging.getLogger('explorethestacks') logger.info('Creating shelves') engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) dbsession = DBSession() shelf = None book_count = 0 idx = 0 count = 0 with transaction.manager: dbsession.query(Shelf).delete() with transaction.manager: for shelf_mark in dbsession.query(ShelfMark).order_by(ShelfMark.title): if not shelf: idx = idx + 1 shelf = Shelf(order=idx) dbsession.add(shelf) shelf.shelf_marks.append(shelf_mark) book_count = len(shelf_mark.books) elif book_count + len(shelf_mark.books) > 200: idx = idx + 1 shelf = Shelf(order=idx) dbsession.add(shelf) shelf.shelf_marks.append(shelf_mark) book_count = len(shelf_mark.books) else: shelf.shelf_marks.append(shelf_mark) book_count = book_count + len(shelf_mark.books) count = count + 1 if count % 10000 == 0: dbsession.flush() logger.debug('%s shelfmarks processed' % (count)) logger.debug('%s shelfmarks processed' % (count)) logger.debug('Creating shelf hierarchy') with transaction.manager: while dbsession.query(Shelf).filter( Shelf.parent_id == None).count() > 50: idx = 0 parent_shelf = None child_count = 0 for shelf in dbsession.query(Shelf).filter( Shelf.parent_id == None).order_by(Shelf.order): if not parent_shelf: idx = idx + 1 parent_shelf = Shelf(order=idx) dbsession.add(parent_shelf) shelf.parent = parent_shelf child_count = child_count + 1 elif child_count > 50: idx = idx + 1 parent_shelf = Shelf(order=idx) dbsession.add(parent_shelf) shelf.parent = parent_shelf child_count = 1 else: shelf.parent = parent_shelf child_count = child_count + 1 with transaction.manager: root_shelf = Shelf() dbsession.add(root_shelf) for shelf in dbsession.query(Shelf).filter(Shelf.parent_id == None): if shelf != root_shelf: shelf.parent = root_shelf logger.debug('Creating shelf titles') def create_titles(shelf): if shelf.children: for child in shelf.children: create_titles(child) shelf.start = shelf.children[0].start shelf.end = shelf.children[-1].end elif shelf.shelf_marks: shelf.start = shelf.shelf_marks[0].title shelf.end = shelf.shelf_marks[-1].title with transaction.manager: root_shelf = dbsession.query(Shelf).filter( Shelf.parent_id == None).first() create_titles(root_shelf) root_shelf.start = 'Explore the Stacks' root_shelf.end = 'Explore the Stacks' logger.info('Shelves created')