Exemple #1
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None
        content = {}

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug(u'Reading file %s' % csvFile)
            reader = csv.reader(open(csvFile))
            for row in reader:
                if reader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                else:
                    # Otherwise, the key in the content dictionary is the first item in the
                    # row and we can create the sub-dictionary by using the zip() function.
                    # we include the key in the dictionary as our exported csv files use
                    # bookid as the key
                    content[row[0]] = dict(zip(headers, row))

            # We can now get to the content by using the resulting dictionary, so to see
            # the list of lines, we can do: print content.keys()  to get a list of keys
            # To see the list of fields available for each book:  print headers

            if 'Author' not in headers or 'Title' not in headers:
                msg = 'Invalid CSV file found %s' % csvFile
                logger.warn(msg)
                return msg

            myDB = database.DBConnection()
            bookcount = 0
            authcount = 0
            skipcount = 0
            logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
            for item in content.keys():
                authorname = content[item]['Author']
                if isinstance(authorname, str) and hasattr(authorname, "decode"):
                    authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
                authorname = formatAuthorName(authorname)
                title = content[item]['Title']
                if isinstance(title, str) and hasattr(title, "decode"):
                    title = title.decode(lazylibrarian.SYS_ENCODING)

                authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                if authmatch:
                    logger.debug(u"CSV: Author %s found in database" % authorname)
                else:
                    logger.debug(u"CSV: Author %s not found" % authorname)
                    newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                 addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                    if len(newauthor) and newauthor != authorname:
                        logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                        authorname = newauthor
                    if new:
                        authcount += 1

                bookmatch = finditem(content[item], authorname, headers)
                result = ''
                if bookmatch:
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        bookcount += 1
                else:
                    searchterm = "%s <ll> %s" % (title, authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            bookcount += 1
                            bookmatch = True

                if not bookmatch:
                    msg = "Skipping book %s by %s" % (title, authorname)
                    if not result:
                        msg += ', No results returned'
                        logger.warn(msg)
                    else:
                        msg += ', No match found'
                        logger.warn(msg)
                        msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                    result['authorname'], result['bookname'])
                        logger.warn(msg)
                    skipcount += 1
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Exemple #2
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Please check Alternate Directory setting")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                headers = row
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                # we include the key in the dictionary as our exported csv files use
                # bookid as the key
                content[row[0]] = dict(zip(headers, row))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of keys
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
        for item in content.keys():
            authorname = content[item]['Author']
            if hasattr(authorname, 'decode'):
                authorname = authorname.decode(lazylibrarian.SYS_ENCODING)

            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                newauthor = False
                logger.debug(u"CSV: Author %s found in database" % (authorname))
            else:
                newauthor = True
                logger.debug(u"CSV: Author %s not found, adding to database" % (authorname))
                addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = finditem(content[item], headers)

            # if we didn't find it, maybe author info is stale
            if not bookmatch and not newauthor:
                addAuthorToDB(authorname, refresh=True)
                bookmatch = finditem(content[item], headers)

            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" %
                    (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
Exemple #3
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Please check Alternate Directory setting")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                headers = row
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                # we include the key in the dictionary as our exported csv files use
                # bookid as the key
                content[row[0]] = dict(zip(headers, row))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of keys
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s book%s in csv file" %
                     (len(content.keys()), plural(len(content.keys()))))
        for item in content.keys():
            authorname = content[item]['Author']
            if hasattr(authorname, 'decode'):
                authorname = authorname.decode(lazylibrarian.SYS_ENCODING)

            authmatch = myDB.action(
                'SELECT * FROM authors where AuthorName="%s"' %
                (authorname)).fetchone()

            if authmatch:
                newauthor = False
                logger.debug(u"CSV: Author %s found in database" %
                             (authorname))
            else:
                newauthor = True
                logger.debug(u"CSV: Author %s not found, adding to database" %
                             (authorname))
                addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = finditem(content[item], headers)

            # if we didn't find it, maybe author info is stale
            if not bookmatch and not newauthor:
                addAuthorToDB(authorname, refresh=True)
                bookmatch = finditem(content[item], headers)

            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(
                        u'Found book %s by %s, already marked as "%s"' %
                        (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" %
                            (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(
            u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found"
            % (authcount, plural(authcount), bookcount, plural(bookcount),
               skipcount, plural(skipcount)))
def processCSV(search_dir=None):        
    """ Find a csv file in the search_dir and process all the books in it, 
    adding authors to the database if not found, and marking the books as "Wanted" """
     
    if not search_dir:
        logger.warn("Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn("No CSV file found in %s" % search_dir)
    else:
        logger.debug('Reading file %s' % csvFile)
        reader=csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))
            
        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        #print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        #print headers
        
        if 'Author' not in headers or 'Title' not in headers:
            logger.warn('Invalid CSV file found %s' % csvFile)
            return
            
        myDB = database.DBConnection() 
        bookcount = 0
        authcount = 0
        skipcount = 0  
        logger.debug("CSV: Found %s entries in csv file" % len(content.keys()))  
        for bookid in content.keys():
            
            authorname = content[bookid]['Author']
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()
        
            if authmatch:
                logger.debug("CSV: Author %s found in database" % (authorname))
            else:
                logger.debug("CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10=""
            isbn13=""    
            bookname = content[bookid]['Title']
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, fuzzy name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch: 
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info('Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else: # skipped/ignored
                    logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}                  
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:    
                logger.warn("Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info("Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))    
Exemple #5
0
def processCSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
    adding authors to the database if not found, and marking the books as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys()))
        for bookid in content.keys():

            authorname = formatter.latinToAscii(content[bookid]['Author'])
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                logger.debug(u"CSV: Author %s found in database" % (authorname))
            else:
                logger.debug(u"CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10 = ""
            isbn13 = ""
            bookname = formatter.latinToAscii(content[bookid]['Title'])
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch:
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" %
                    (authcount, bookcount, skipcount))