def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: msg = "No CSV file found in %s" % search_dir logger.warn(msg) return msg else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: print content.keys() to get a list of keys # To see the list of fields available for each book: print headers if 'Author' not in headers or 'Title' not in headers: msg = 'Invalid CSV file found %s' % csvFile logger.warn(msg) return msg myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if isinstance(authorname, str) and hasattr(authorname, "decode"): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authorname = formatAuthorName(authorname) title = content[item]['Title'] if isinstance(title, str) and hasattr(title, "decode"): title = title.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,)) if authmatch: logger.debug(u"CSV: Author %s found in database" % authorname) else: logger.debug(u"CSV: Author %s not found" % authorname) newauthor, authorid, new = addAuthorNameToDB(author=authorname, addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']) if len(newauthor) and newauthor != authorname: logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor)) authorname = newauthor if new: authcount += 1 bookmatch = finditem(content[item], authorname, headers) result = '' if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount += 1 else: searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookcount += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (title, authorname) if not result: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) skipcount += 1 msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \ (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Please check Alternate Directory setting") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of keys # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if hasattr(authorname, 'decode'): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: newauthor = False logger.debug(u"CSV: Author %s found in database" % (authorname)) else: newauthor = True logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = finditem(content[item], headers) # if we didn't find it, maybe author info is stale if not bookmatch and not newauthor: addAuthorToDB(authorname, refresh=True) bookmatch = finditem(content[item], headers) if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info(u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Please check Alternate Directory setting") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of keys # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if hasattr(authorname, 'decode'): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: newauthor = False logger.debug(u"CSV: Author %s found in database" % (authorname)) else: newauthor = True logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = finditem(content[item], headers) # if we didn't find it, maybe author info is stale if not bookmatch and not newauthor: addAuthorToDB(authorname, refresh=True) bookmatch = finditem(content[item], headers) if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info( u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
def processCSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found, and marking the books as "Wanted" """ if not search_dir: logger.warn("Alternate Directory must not be empty") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn("No CSV file found in %s" % search_dir) else: logger.debug('Reading file %s' % csvFile) reader=csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row # by taking a slice from item 1 as we don't need the very first header. headers = row[1:] else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. content[row[0]] = dict(zip(headers, row[1:])) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: #print content.keys() # to get a list of bookIDs # To see the list of fields available for each book #print headers if 'Author' not in headers or 'Title' not in headers: logger.warn('Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug("CSV: Found %s entries in csv file" % len(content.keys())) for bookid in content.keys(): authorname = content[bookid]['Author'] authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("CSV: Author %s found in database" % (authorname)) else: logger.debug("CSV: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = 0 isbn10="" isbn13="" bookname = content[bookid]['Title'] if 'ISBN' in headers: isbn10 = content[bookid]['ISBN'] if 'ISBN13' in headers: isbn13 = content[bookid]['ISBN13'] # try to find book in our database using isbn, or if that fails, fuzzy name matching if formatter.is_valid_isbn(isbn10): bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone() if not bookmatch: if formatter.is_valid_isbn(isbn13): bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone() if not bookmatch: bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone() if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info('Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn("Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info("Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))
def processCSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found, and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Alternate Directory must not be empty") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row # by taking a slice from item 1 as we don't need the very first header. headers = row[1:] else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. content[row[0]] = dict(zip(headers, row[1:])) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of bookIDs # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys())) for bookid in content.keys(): authorname = formatter.latinToAscii(content[bookid]['Author']) authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug(u"CSV: Author %s found in database" % (authorname)) else: logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = 0 isbn10 = "" isbn13 = "" bookname = formatter.latinToAscii(content[bookid]['Title']) if 'ISBN' in headers: isbn10 = content[bookid]['ISBN'] if 'ISBN13' in headers: isbn13 = content[bookid]['ISBN13'] # try to find book in our database using isbn, or if that fails, name matching if formatter.is_valid_isbn(isbn10): bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone() if not bookmatch: if formatter.is_valid_isbn(isbn13): bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone() if not bookmatch: bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone() if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))