def prep_connection(self, uri, db_name): # create server object self.server = Server(uri=uri) # create database self.db = self.server.get_or_create_db(db_name) # associate local objects to the db Book.set_db(self.db)
def run(self): """do the hard work of parsing, reading, and adding all XML-based data to the new object model""" # parse the original file self._parse_xml() # show xml store's version print 'xml store version: %s' % self.xml_store.getroot().attrib['version'] # pull out all statuses statuses_from_xml = {} for status_xml in self.xml_store.findall('statuses/status'): status_group = status_xml.attrib['name'] statuses_from_xml[status_group] = {} for status_item_xml in status_xml.getiterator('item'): statuses_from_xml[status_group][status_item_xml.attrib['id']] = status_item_xml.text # prep status objects status_objects = {'Search': {'1': 'do not search', '10': 'to search', '20': 'searched, has results', '30': 'searched, no results'} } for k, v in statuses_from_xml.items(): status_group = k status_objects[status_group] = {} for kx, vx in v.items(): status_objects[status_group][kx] = vx missing_status_groups = [] # for each book in library/books for book_xml in self.xml_store.findall('books/book'): try: # make the book book = Book() book.type = 'book' book._id = uuid_fixer(book_xml.attrib['uid']) book.title = book_xml.attrib['title'] book.public = book_xml.attrib['public'] == '1' book.isbn = book_xml.attrib['isbn'] or book_xml.attrib['isbnasinput'] # add notes notes_xml = book_xml.find('notes') if notes_xml.text: book.notesPublic = notes_xml.text # add or get the publisher publisher_xml = book_xml.find('publisher') if publisher_xml is not None: book.publisher = publisher_xml.text # add the authors authors = [] for author_xml in book_xml.getiterator('author'): # set authors until all done, or one is primary if author_xml.attrib['primary'] == '1': authors.insert(0, author_xml.text) else: authors.append(author_xml.text) # remove any duplicates book.authors = list(set(authors)) # add statuses for status_on_book in book_xml.getiterator('status'): status_group = status_on_book.attrib['name'] if status_group in status_objects.keys(): converted_status = status_mapper(status_group, status_on_book.attrib['value']) if converted_status: book.status.update(converted_status) else: # log it if status_group not in missing_status_groups: missing_status_groups.append(status_group) # store date finished date_finished = book_xml.attrib['datefinished'] if len(date_finished) > 0: book.activities.append( dict(date=date_finished, action="book.read.finished") ) # add tag to identify imported books book.tags.append("import.v1") print(book.title) # save to couchdb book.save() self.good_books += 1 except: self.bad_books.append(book_xml.attrib['title']) if len(self.bad_books) > 0: print 'THERE WERE %s IMPORT ERRORS!' % len(self.bad_books) for bad_book in self.bad_books: print bad_book # done adding, let's see what we have print('Book count: {0}'.format(self.good_books)) print('Bad books: {0}'.format(len(self.bad_books))) print('Missing status groups: {0}'.format(len(missing_status_groups))) if len(missing_status_groups) > 0: print('\t{0}'.format(missing_status_groups))
def run(self): """do the hard work of parsing, reading, and adding all XML-based data to the new object model""" # parse the original file self._parse_xml() # show xml store's version print 'xml store version: %s' % self.xml_store.getroot( ).attrib['version'] # pull out all statuses statuses_from_xml = {} for status_xml in self.xml_store.findall('statuses/status'): status_group = status_xml.attrib['name'] statuses_from_xml[status_group] = {} for status_item_xml in status_xml.getiterator('item'): statuses_from_xml[status_group][ status_item_xml.attrib['id']] = status_item_xml.text # prep status objects status_objects = { 'Search': { '1': 'do not search', '10': 'to search', '20': 'searched, has results', '30': 'searched, no results' } } for k, v in statuses_from_xml.items(): status_group = k status_objects[status_group] = {} for kx, vx in v.items(): status_objects[status_group][kx] = vx missing_status_groups = [] # for each book in library/books for book_xml in self.xml_store.findall('books/book'): try: # make the book book = Book() book.type = 'book' book._id = uuid_fixer(book_xml.attrib['uid']) book.title = book_xml.attrib['title'] book.public = book_xml.attrib['public'] == '1' book.isbn = book_xml.attrib['isbn'] or book_xml.attrib[ 'isbnasinput'] # add notes notes_xml = book_xml.find('notes') if notes_xml.text: book.notesPublic = notes_xml.text # add or get the publisher publisher_xml = book_xml.find('publisher') if publisher_xml is not None: book.publisher = publisher_xml.text # add the authors authors = [] for author_xml in book_xml.getiterator('author'): # set authors until all done, or one is primary if author_xml.attrib['primary'] == '1': authors.insert(0, author_xml.text) else: authors.append(author_xml.text) # remove any duplicates book.authors = list(set(authors)) # add statuses for status_on_book in book_xml.getiterator('status'): status_group = status_on_book.attrib['name'] if status_group in status_objects.keys(): converted_status = status_mapper( status_group, status_on_book.attrib['value']) if converted_status: book.status.update(converted_status) else: # log it if status_group not in missing_status_groups: missing_status_groups.append(status_group) # store date finished date_finished = book_xml.attrib['datefinished'] if len(date_finished) > 0: book.activities.append( dict(date=date_finished, action="book.read.finished")) # add tag to identify imported books book.tags.append("import.v1") print(book.title) # save to couchdb book.save() self.good_books += 1 except: self.bad_books.append(book_xml.attrib['title']) if len(self.bad_books) > 0: print 'THERE WERE %s IMPORT ERRORS!' % len(self.bad_books) for bad_book in self.bad_books: print bad_book # done adding, let's see what we have print('Book count: {0}'.format(self.good_books)) print('Bad books: {0}'.format(len(self.bad_books))) print('Missing status groups: {0}'.format(len(missing_status_groups))) if len(missing_status_groups) > 0: print('\t{0}'.format(missing_status_groups))