Beispiel #1
0
def get_client():
    gc = client.GoodreadsClient(
        os.getenv('GOODREADS_CLIENT_ID'),
        os.getenv('GOODREADS_CLIENT_SECRET'),
    )
    # gc.authenticate()
    return gc
Beispiel #2
0
def main():
    logging.debug("Creating good reads client.")
    grclient = client.GoodreadsClient(config['GOODREADKEYS']['KEY'], config['GOODREADKEYS']['SECRET'])
    grcollector = collectdata.GoodreadsCollect(grclient, config['DATA_DIR_PATH']['PATH'])

    end_after = int(config['TIMEPARAM']['END_TIME'])
    wait_time = int(config['TIMEPARAM']['WAIT_TIME'])

    logging.debug(f"Execution started at {datetime.now()}")
    end_time = datetime.now() + timedelta(seconds=end_after)
    logging.debug(f"Execution will end at : {end_time}")

    while(datetime.now() < end_time):
        logging.debug("Fetching data...........")

        try:
            grcollector.fetch_data()
        except Exception as e:
            logging.exception("Some exception occured : ", e)
            continue

        logging.debug(f"Waiting for {config['TIMEPARAM']['WAIT_TIME']} seconds")
        time.sleep(wait_time)

    logging.debug("Execution ended.")
Beispiel #3
0
 def __init__(self):
     #sets up good reads client and elastic search,
     #should not really need goodreads client
     self.gc = client.GoodreadsClient(
         "NGr7Zl6XG9nTeNClLz9xA",
         "WILsiGKkWTEoKh4M7z11TF0P2ukSRcJ2OEFJMngDgY")
     self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
Beispiel #4
0
def index(request):
    api_key = key['api_key']
    api_secret = key['api_secret']
    gc = client.GoodreadsClient(api_key, api_secret)
    first_book = gc.book(1)
    second_book = gc.book(2)
    third_book = gc.book(3)
    fourth_book = gc.book(4)
    fifth_book = gc.book(5)
    sixth_book = gc.book(6)
    seventh_book = gc.book(7)
    eigth_book = gc.book(8)
    ninth_book = gc.book(9)
    books = []
    print(gc)
    context = {
        'first_book': first_book,
        'second_book': second_book,
        'third_book': third_book,
        'fourth_book': fourth_book,
        'fifth_book': first_book,
        'sixth_book': sixth_book,
        'seventh_book': seventh_book,
        'eigth_book': eigth_book,
        'ninth_book': ninth_book,
    }
    return render(request, 'good_reads_api/index.html', context)
Beispiel #5
0
def get_gr_page(author):

    gc = client.GoodreadsClient(goodreads_key, goodreads_secret)

    book_ids = gc.search_books(author, search_field='author')
    # order seems to be driven by likelihood of quotes so
    book_ids = book_ids[:3]
    #print(book_ids)
    #n = randrange(len(book_ids))
    #book_id = book_ids[n]
    while 1:
        book_id = choice(book_ids)
        #print(f"{book_id=}")

        try:
            page = urllib.request.urlopen(gr_url + book_id).read()
        except Exception as e:
            print(f"Exception retrieving from goodreads: {e}")
            return None

        soup = BeautifulSoup(page, "lxml")
        if soup.find("div", class_="quoteText"):
            return soup
        book_ids.remove(book_id)
        if not book_ids:
            return None
def get_basic_book_info(isbn):
    gc = client.GoodreadsClient(Info.key, Info.secret)
    book = gc.book(None, isbn)
    title = book.title
    author = book.authors[0]
    publisher = book.publisher
    img_url = book.image_url
    book_link = book.link
    book_info = [title, author, publisher, img_url, book_link]
    return book_info
Beispiel #7
0
def initiate_goodreads_lient():
    global gc
    #read api connection details from ini file.
    config = configparser.ConfigParser()
    #os.path.dirname(os.path.realpath(__file__))
    config.read(os.path.curdir + r'\goodreads.ini')
    print(str(config.sections()))
    api_key = config.get('access_info', 'api_key')
    api_secret = config.get('access_info', 'api_secret')
    gc = client.GoodreadsClient(api_key, api_secret)
    return gc
def main():
    args = parse_args()

    # Setup stdout
    encoding = locale.getpreferredencoding()
    writer = getwriter(encoding)
    sys.stdout = writer(sys.stdout)

    args.query = to_unicode(args.query)

    goodreads_key = api_key
    goodreads_secret = api_secret

    grc = client.GoodreadsClient(goodreads_key, goodreads_secret)

    books = get_books(grc, args)
    AR_sorted = sorted(books, key=lambda b: float(b.average_rating))
    NTR_sorted = sorted(books, key=lambda b: float(b.text_reviews_count))
    max_AR = float(AR_sorted[-1].average_rating)
    max_NTR = float(NTR_sorted[-1].text_reviews_count)

    Books = []
    totaux = []
    for book in books:
        d = {
            star_label: int(star_number)
            for star_label, star_number in (
                tuple(rating.split(':'))
                for rating in book.rating_dist.split('|'))
        }
        totaux.append(float(d.pop('total', 'inf')))
    TR_sorted = sorted(totaux)
    max_TR = TR_sorted[-1]

    for book in books:
        b = Book(book, max_AR, max_NTR, max_TR)
        Books.append(b)

    if not os.path.isdir("results"):
        os.mkdir("results")
    result_file = os.path.join("results", "results.txt")
    with open(result_file, "w") as f:
        old, sys.stdout = sys.stdout, f
        print(
            "List of results, sorted by higher fitness:\n<Book title> : <Book fitness>"
        )
        print("-" * 56)
        for book in sorted(Books, key=lambda b: b.fitnessScore, reverse=True):
            print(book)
    sys.stdout = old  # restore stdout
    print(
        "Find your results in the 'results.txt' file of the root directory of the script"
    )
Beispiel #9
0
def get_books_from_gr_api(query, api_key, api_secret):
	gc = client.GoodreadsClient(api_key, api_secret)
	new_books = []
	for i in range(1,10):
		try:
			new_books += gc.search_books(q=query, page=i, search_field='all')
		except:
			continue

	new_books = [ book.gid for book in new_books ]
	new_books = list( set(new_books) )
	return new_books
def main():
    """
    Main function of the test module
    """

    # setting up the API keys from local keys.py file
    goodreads_key = os.environ['GOODREADS_KEY']
    goodreads_secret = os.environ['GOODREADS_SECRET']

    # creating a client for book search and information retrieval
    gc = client.GoodreadsClient(goodreads_key, goodreads_secret)

    current_path = os.getcwd()

    file = open(os.path.join(current_path, "output", "log.json"), "w")

    gutenberg_titles = []

    # Getting the title of the first 3000 books on Project Gutenberg (EXTREMELY FAST)
    for i in range(1, 10):
        title = list(get_metadata('title', i))
        if title:
            # prepare the string for the file name
            filename = ''.join(
                e for e in title[0] if e.isalnum() or e == ' ') + ".txt"
            gutenberg_titles.append(filename[:-4])
            text = strip_headers(load_etext(i)).strip()
            with open(os.path.join(current_path, "output", filename),
                      "w") as output_file:
                output_file.write(text)

    titles = dict()
    # Searching for the books on Goodreads, reading their metadata
    for book_title in gutenberg_titles:
        try:
            lst = gc.search_books(book_title, search_field='title')

            if not lst:
                continue
            else:
                book = lst[0]

            titles[book.title] = (
                book_title + ".txt", str(book.popular_shelves),
                str(book.similar_books), str(book.authors),
                dict(dict(book.work)['original_publication_year'])['#text'])
        except (request.GoodreadsRequestException, KeyError, TypeError):
            continue

    json.dump(titles, file, indent=4)
    file.close()
Beispiel #11
0
def recommend_book(feeling, FastText_model, book_kmeans, feelings_knn, glove):
    '''function to get a list of 5 books that are most relevant to the user's query

    input:
    feeling: the user query string
    FastText_model: the pre-trained FastText model on all book summaries from the Goodreads list 'Best Books Ever'
    book_kmeans: the pre-trained k-means clustering model on the vector descriptions of all book summaries from the Goodreads list 'Best Books Ever'
    feelings_knn: the pre-trained k-NN model on all cases
    glove: the dictionary of pre-trained GloVe word embeddings

    output:
    a list of 5 books that are most relevant to the user's query
    '''

    # create the Goodreads API client
    goodreads_key = 't6mIkabukH29jAey0381yA'
    goodreads_secret = '6IWvvO5CNFIAqUragee2Bb5HkEOvxIYqSXeXdFSHvM'
    good_client = client.GoodreadsClient(goodreads_key, goodreads_secret)

    # get the book based on the query
    cure_short = pd.read_csv('cure_short.csv')

    # get the matching case and the book prescribed for the case
    case_indices = feeling_to_case(feeling, feelings_knn, glove)

    id = cure_short.iloc[case_indices]['goodreads_id'].item()
    test_book = good_client.book(id)
    test_book_description = test_book.description
    test_book_vector = preprocess(test_book_description, FastText_model)

    # get the cluster that the prescribed book belongs to
    cluster_number = book_kmeans.predict(test_book_vector).item()
    info_books = pd.read_json('best_books_clustered.json')
    predicted_cluster = info_books[info_books['cluster'] == cluster_number]
    predicted_cluster.reset_index(drop=True, inplace=True)

    # fit a k-NN to the vector description of all the books inside the cluster
    X = list(predicted_cluster['vector_description'])
    X = np.array(X)
    books_knn = NearestNeighbors(metric='cosine')  # should try 'mahalanobis'
    books_knn.fit(X)

    # get a list of the most similar books to the prescribed book from the cluster
    distance, indices = books_knn.kneighbors(test_book_vector, n_neighbors=5)
    indices = indices.reshape(-1, )
    recommendation_list = list(predicted_cluster.iloc[indices]['title'])

    return recommendation_list
Beispiel #12
0
def getBookInfo(count=booksCount):
    """
    returns a list of objects, books(going by id) --> all books that exist from #1 to count (inclusive 
    """
    gc = client.GoodreadsClient("JdH0YOXuBLIFXhXUGMNmA",
                                "XtIlhsavfhSICDEMLzWLRyKp509VNaeGbVUh7tJaQ")
    gc.authenticate("JdH0YOXuBLIFXhXUGMNmA",
                    "XtIlhsavfhSICDEMLzWLRyKp509VNaeGbVUh7tJaQ")
    bookList = []

    for i in range(1, count + 1):
        try:
            bookList.append(gc.book(i))  # get id --> .gid
            print('#' + bookList[-1].gid, bookList[-1].title)
        except Exception:
            print("book", i, "cannot be found?")
    return bookList
Beispiel #13
0
def goodreads(isbn):
    cl = client.GoodreadsClient("YwoRrbmvD1xQgyfIIOPQ", "INo8jC4JeAVOiXFckzQPKqYYeuq2wFLJyBKQmidDy0")
    book = cl.book(isbn=isbn)
    title = book.title
    authors = book.authors
    author = ''
    for a in authors:
        author += a.__repr__()
        author += ", "
    avg_rating = book.average_rating
    rev_count = book.text_reviews_count
    
    return jsonify({
        "title": title
        "author": author
        "avg_rating": avg_rating
        "rev_count": rev_count
    })   
def set_genre_goodreads(books):
    CONSUMER_KEY = 'gsyDVEcZ3ZOFqDvXxrA'
    CONSUMER_SECRET = 'U182SOVsGZRYY7IFJ7jqpltLyXkBvPwp9mUnLsWcek'
    gr = client.GoodreadsClient(CONSUMER_KEY, CONSUMER_SECRET)
    i = 0
    chunk_size = 500

    already_processed = books.dropna(subset=['Hashed-Genre'], how='any')
    to_be_read = set(books.index) - set(already_processed.index)
    for isbn in to_be_read:
        print("Reading book ", i, " of ", len(to_be_read))
        i += 1
        try:
            shelves = gr.book(isbn=isbn).popular_shelves
            shelves = [str(x) for x in shelves]
        except Exception:
            print("Error in ISBN ", isbn)
            continue

        undesired_shelves = set([
            'to-read', 'own', 'books-i-own', 'currently-reading',
            'owned-books', 'default', 're-read', 'my-books', 'owned',
            'favorite', 'favorites', 'wishlist', 'kindle', 'to-buy', 'library',
            'ebook', 'maybe', 'my-book', 'e-book', 'e-books', 'books',
            'my-library', 'want-to-read', 'have', 'own-it', 'own-to-read',
            'giller-winners', 'recommended', 'wish-list', 'my-booshelf',
            'short-stories', 'didn-t-finish', 'read-in-2015', 'coming-of-age',
            'read-in-2009', 'read-in-2016', 'gave-up', 'dnf',
            'personal-library', 'book-club', 'not-available-on-overdrive',
            'abandoned', 'single-books', 'must-read'
        ])
        for x in set(shelves).intersection(undesired_shelves):
            shelves.remove(x)
        if len(shelves) != 0:
            genre = shelves[0]
            books.set_value(isbn, 'Hashed-Genre', hash_text(genre))
        else:
            books.set_value(isbn, 'Hashed-Genre', 0)

        # save constantly the books dataset
        if i % chunk_size == 0:
            save_processed_dataframe(books)
    save_processed_dataframe(books)
Beispiel #15
0
    def __init__(self, api_info, api_type='goodreads'):
        """
        Inits a knowledgeSeeker who will seek out book information.
        :param userlist_name: Name of userlist to get books from.
        :param api_info: key info for the api we are using. This is (key, secret) for goodreads
        and (amazon_access_key, amazon_secret_key, amazon_assoc_tag) for amazon.
        :param api_type: 'goodreads' or 'amazon'. The first gets book objects from goodreads
        and the second gets book objects from amazon.
        """

        self.api_type = api_type
        self.current_path = None

        if api_type == 'goodreads':
            self.api_client = client.GoodreadsClient(api_info[0], api_info[1])
            self.bookshelf_path = "../data/book_db/goodreads_bookshelf.db"
        else:
            raise Exception("api_type must be \'goodreads\'.")

        self.bookshelf = shelve.open(self.bookshelf_path)
Beispiel #16
0
 def __init__(self,
              index,
              search_type,
              rating_weight=1,
              pages_weight=1,
              shelf_weight=1,
              regular_weight=1,
              idf=True):
     #sets up good reads client and elastic search,
     #should not really need goodreads client
     self.gc = client.GoodreadsClient(
         "NGr7Zl6XG9nTeNClLz9xA",
         "WILsiGKkWTEoKh4M7z11TF0P2ukSRcJ2OEFJMngDgY")
     self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
     self.index = index
     self.type = search_type
     self.shelf_to_pos, self.shelf_to_score = self.read_shelf_file(idf)
     self.rating_weight = rating_weight
     self.pages_weight = pages_weight
     self.regular_weight = regular_weight
     self.shelf_weight = shelf_weight
     self.reg = re.compile("\w+")
def main():
    """
    Main function of the test module
    """

    # setting up the API keys from local keys.py file
    goodreads_key = os.environ['GOODREADS_KEY']
    goodreads_secret = os.environ['GOODREADS_SECRET']

    # creating a client for book search and information retrieval
    gc = client.GoodreadsClient(goodreads_key, goodreads_secret)

    current_path = os.getcwd()

    file = open(os.path.join(current_path, "output", "log.txt"), "a")

    # Getting the title of the first 3000 books on Project Gutenberg (EXTREMELY FAST)
    for i in range(1, 10):
        title = list(get_metadata('title', i))
        if title:
            # prepare the string for the file name
            filename = ''.join(e for e in title[0] if e.isalnum()) + ".txt"
            text = strip_headers(load_etext(i)).strip()
            with open(os.path.join(current_path, "output", filename),
                      "w") as output_file:
                output_file.write(text)
            file.write(f"{title[0]} plaintext saved to '{title[0]}.txt'\n")

    # Getting the titles and publishing years for the first 3000 books on Goodreads
    # Pretty slow because Goodreads allows 1 request per second
    for i in range(1, 20):
        try:
            book = gc.book(i)
            file.write(
                f"{book.title} - published in {dict(dict(book.work)['original_publication_year'])['#text']}\n"
            )
        except (request.GoodreadsRequestException, KeyError):
            continue
Beispiel #18
0
from goodreads import client
import pickle

gc = client.GoodreadsClient("ueUMoVwAkXNUPcOA4Y35yA", "R2N1HWPOdt4EEwQionNz0h3BgkY2z0yP5T3eXp8djc")

book_dict = {}


def get_book(i):
    try:
        book = gc.book(str(i))
        book_dict[i] = (book.publisher, book.description, book.average_rating)
    except:
        return None

for i in range(10000):
    get_book(i)
    print(i)

pickle.dump(book_dict, "data.pkl")
Beispiel #19
0
from flask import Flask, request, render_template
from goodreads import client

gc = client.GoodreadsClient('ZfHUtSBSMU3ZOTo82zl7wA','OlI30Km37ikK1EIgG7jwWDxeiaAJDX10Fm9zw9XTUA')

app = Flask(__name__)

@app.route('/')
def form():
	return render_template('form.html' )

@app.route('/', methods=['POST'])
def form_post():
	try:
		input = request.form['input']
		 isbn_no  = int(text.strip())
		 book_id  = int(text.strip())
		recommendations = gc.book(isbn_no).similar_books[:7]
		recommendations.insert(0,gc.book(book_id))
		print(recommendations)
		return render_template('index.html', len = 7, recommendations=links)
	except:
		return render_template('404.html')
	
if __name__=="__main__":
	app.run()


def main(argv):
    # since we are reading only, no need to authenticate
    # the script expects the api client key and the client secret as the first two arguments to the script
    # you gets these from Goodreads (see https://www.goodreads.com/api for more details and to get your own)
    gc = client.GoodreadsClient(argv[1], argv[2])

    # parse the markdown document (table) into lines
    lines = [line.strip() for line in open('books.md')]

    # create new headings starting with the old markdown file header
    headings = create_new_headings(lines[0])

    # create a list for the parsed and extended book records
    records = []

    idx = 1
    # skip the second line from the markdown file as it is markdown for the table header
    for line in lines[2:]:
        print(idx)
        idx = idx + 1
        # within each line use the table column markdown to split into fields
        record = line.split(' | ')

        # set some defaults for the book attributes in case they cannot be found
        # set the default rating to '0.0' and the rating count to '0'
        book_title = ''
        average_rating = '0.0'
        ratings_count = '0'
        number_pages = '0'
        publication_date = '1/1/1000'
        publisher = ''
        isbn = ''

        # we can only use the Goodreads API to get the ratings data if a link to the book in Goodreads exists
        if "[Goodreads]" in record[3]:
            book_id = find_book_id(record[3])
            attributes = get_book_attributes(gc, book_id)
            book_title = book_title if attributes[
                "title"] is None else attributes["title"]
            average_rating = average_rating if attributes[
                "average rating"] is None else attributes["average rating"]
            ratings_count = ratings_count if attributes[
                "ratings count"] is None else attributes["ratings count"]
            number_pages = number_pages if attributes[
                "number pages"] is None else attributes["number pages"]
            publication_date = publication_date if attributes[
                "publication date"] is None else attributes["publication date"]
            publisher = publisher if attributes[
                "publisher"] is None else attributes["publisher"]
            isbn = isbn if attributes["isbn"] is None else attributes["isbn"]

        record.append(book_title)
        record.append(average_rating)
        record.append(ratings_count)
        record.append(number_pages)
        record.append(publication_date)
        record.append(publisher)
        record.append(isbn)
        records.append(record)

    # sort by average rating: highest to lowest
    records.sort(key=lambda x: float(x[5]), reverse=True)

    # write the output
    write_markdown("books_ratings.md", headings, records)
    write_data("books_ratings", headings, records)
Beispiel #21
0
from goodreads import client
import json
import time
import requests

api_key = ''
api_token = ''
book_list = []
all_book_title = []
book_number = 1

gc = client.GoodreadsClient(api_key, api_token)


def API_json(gc, book_number, all_book_title, book_list):
    while (book_number < 10000000):
        try:
            book = gc.book(book_number)
            dist = book.rating_dist
            #seprator = ';'
            #KharCode = str(book.title) + seprator + str(book.authors[0]) + seprator + str(book.isbn) + seprator + str(book.language_code) + seprator + str(book.publication_date) + seprator + str(book.publisher) + seprator + str(book.num_pages) + seprator + str(book.rating_dist) + seprator + str(book.average_rating) + seprator + str(book.format) + seprator + str(book.is_ebook) + seprator + str(book.text_reviews_count) + seprator + str(book.gid) + "\n"
            book_dic = {
                "Id": str(book_number),
                "Name": str(book.title),
                "Authors": str(book.authors[0]),
                "ISBN": book.isbn,
                "Rating": float(book.average_rating),
                "PublishYear": int(book.publication_date[2]),
                "PublishMonth": int(book.publication_date[1]),
                "PublishDay": int(book.publication_date[0]),
                "Publisher": book.publisher,
Beispiel #22
0
from goodreads import client
from database import firebase
from FMRI import main_method

cache = SimpleCache()
app = Flask(__name__, template_folder='Templates')
app.secret_key = 'super secret key'
now = datetime.datetime.now()
#GR_KEY = 'hCgnomVsyFTNe73zLW7Q'
GR_KEY = 'HeN7az3JVPSRALsq6Jxpfg'
#GR_SECRET = 'SR9wYYUa5tzHx0vQrVZEDghDoMBjtkItshkYwVkCcQ'
GR_SECRET = 'J2qXu5Q1XSvSYusFWxEEAXeqNr72fEScyzhDk2Gx8'
GR_ACCESS_TOKEN = app.secret_key
GR_ACCESS_TOKEN_SECRET = app.secret_key

gc = client.GoodreadsClient(GR_KEY, GR_SECRET)

auth = firebase.auth()
db = firebase.database()
db_storage = firebase.storage()


@app.route('/')
def index():
    return render_template('home.html')


@app.route('/about')
def about():
    return render_template('about.html')
import logging
import logging.config
logging.config.fileConfig('logging.conf')

# create logger
logger = logging.getLogger('mainLMM')

logging.info("__________START______")
from goodreads import client
gc = client.GoodreadsClient('gOCk7w1mOcYTs97j9BmCkw',
                            'JlcnVqvmuoyznIO06huEbreIcT9mNw4qyFAbjVu3WI')
logging.debug("apres gc =")

book = gc.book(1)
logging.debug("apres gc.book")
msg = "BOKK TITLE : " + book.title
logging.info(msg)
logging.debug("apres book.title")

gc.authenticate()
logging.debug("apres authenticate")
Beispiel #24
0
def book_search(q, page=1, search_field='all'):
    auth_client = client.GoodreadsClient(key, secret)
    books_list = auth_client.search_books(str(q), page, search_field)
    return books_list[0].title, books_list[0].average_rating
Beispiel #25
0
 def authenticate(self):
     self.auth_client = client.GoodreadsClient(self._client_key,
                                               self._client_secret)
load_dotenv()

DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
GOOD_READS_KEY = os.getenv('GOOD_READS_KEY')
GOOD_READS_SECRET = os.getenv('GOOD_READS_SECRET')
FOLLOWERS_LIST_HANDLE = os.getenv('FOLLOWERS_LIST_HANDLE')
HELP_TEXT_TITLE = f'''grdbot: The handsomest D-Bot for accessing GoodReads'''
HELP_TEXT_HEADER = f'''
This is the GoodReads D-bot speaking, thanks for forcing me to be your slave.
Feel free to exploit my labor in other servers, [Invite Link](https://discord.com/api/oauth2/authorize?client_id=753716371992608828&permissions=26624&scope=bot)

I currently respond to the following commands:

'''

good_reads_client = client.GoodreadsClient(GOOD_READS_KEY, GOOD_READS_SECRET)

command_q = commands.CommandQueue(good_reads_client)
discord_client = discord.Client()
follow_manager = FollowManager()

COMMANDS = {
    '\\activity': {
        'args': ['[username]'],
        'description':
        'Will return the most recent GoodReads activity for the given user.',
        'error': commands.ACTIVITY_COMMAND_ERROR,
        'fx': commands.activity_command,
        'name': 'activity',
        'test': test.run_activity_command_test
    },
Beispiel #27
0
def get_the_genre(id):
    genre_list_found = []

    #list of all possible valid genres
    genre_list = [
        'academia', 'academic', 'adult', 'adventure', 'biography', 'classic',
        'classical', 'classics', 'counselling', 'crime', 'fantasy', 'fiction',
        'historical', 'horror', 'humor', 'langauge', 'law', 'mystery',
        'mystery-thriller', 'non-fiction', 'nonfiction', 'over-18', 'religion',
        'romance', 'sci-fi', 'sci-fi-fantasy', 'science', 'science-fiction',
        'scifi', 'self-help', 'social', 'thriller', 'tourism', 'travel', 'war',
        'young-adult'
    ]

    try:
        gc = client.GoodreadsClient(
            'UWKOBM9QcXAw8V1TzcI62g',
            'XX8aOKqAjebooLjSWK3Rx8gn2MauCjWgy9IGAIEG4')
        book = gc.book(id)
        shelves_list_raw = book.popular_shelves

        shelves_list = [str(val) for val in shelves_list_raw]
        #print(shelves_list)
        for genre in genre_list:
            if (genre in shelves_list):
                genre_list_found.append(genre)

        #-------clean the genre list-----------------------------
        for index in range(len(genre_list_found)):

            if (genre_list_found[index] == 'academia'):
                genre_list_found[index] = 'academic'
            if (genre_list_found[index] == 'classics'):
                genre_list_found[index] = 'classic'
            if (genre_list_found[index] == 'classical'):
                genre_list_found[index] = 'classic'
            if (genre_list_found[index] == 'nonfiction'):
                genre_list_found[index] = 'non-fiction'
            if (genre_list_found[index] == 'scifi'):
                genre_list_found[index] = 'sci-fi'
            if (genre_list_found[index] == 'science-fiction'):
                genre_list_found[index] = 'sci-fi'

        if ('mystery-thriller' in genre_list_found):
            genre_list_found.pop(genre_list_found.index('mystery-thriller'))
            genre_list_found.append('thriller')
            genre_list_found.append('mystery')
        if ('sci-fi-fantasy' in genre_list_found):
            genre_list_found.pop(genre_list_found.index('sci-fi-fantasy'))
            genre_list_found.append('sci-fi')
            genre_list_found.append('fantasy')

        genre_list_found = sorted(genre_list_found)
        genre_list_found = list(dict.fromkeys(genre_list_found))

        #-------end of clean the genre list-----------------------------

    except:
        print("There is an error getting genre")

    return genre_list_found
Beispiel #28
0
import time
import pandas as pd
from goodreads import client

#Settings for data scrapping
restart = False
write = True
classify_count = 25
backup_cycle = 1000
start_index = 250000

#current time, used to track run time.
t0 = time.time()

#initialize client for goodreads
gc = client.GoodreadsClient('fm1h6CpTTcPHEgwUAvOD1w',
                            'l62WO2kz3lmJifV7VkGZY4SAcWzkjtUw3Unj7zjE')
gc.authenticate('fm1h6CpTTcPHEgwUAvOD1w',
                'l62WO2kz3lmJifV7VkGZY4SAcWzkjtUw3Unj7zjE')

#load in datafiles
df = pd.read_csv('Compiled_Data.csv', low_memory=False)
settings = pd.read_csv('Settings.csv', error_bad_lines=False, low_memory=False)

#restart classification data if desired
if restart:
    for genre in settings['Genres']:
        df[genre] = False
    df['Classified'] = False

#pulls data from goodreads website
#starting settings
Beispiel #29
0
import os

from goodreads import client

CLIENT = client.GoodreadsClient(
    os.environ.get('GOODREADS_KEY'),
    os.environ.get('GOODREADS_SECRET'),
)
CLIENT.authenticate(
    access_token=os.environ.get('GOODREADS_ACCESS_TOKEN'),
    access_token_secret=os.environ.get('GOODREADS_ACCESS_SECRET'),
)
USER = CLIENT.user()
import os
import pickle
from pprint import pprint as pp
import sys
import xml.dom.minidom
import xmltodict

from goodreads import client

from authorize import authorize

CONSUMER_KEY = os.environ.get('GR_KEY')
CONSUMER_SECRET = os.environ.get('GR_SECRET')
SESSION = 'session' 

try:
    session = pickle.load(open(SESSION, "rb"))
except FileNotFoundError:
    session = authorize()

ACCESS_TOKEN = session.access_token
ACCESS_TOKEN_SECRET = session.access_token_secret

gc = client.GoodreadsClient(CONSUMER_KEY, CONSUMER_SECRET)
gc.authenticate(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

resp = session.get("/updates/friends.xml")
#xml_out = xml.dom.minidom.parseString(resp.content)
d = xmltodict.parse(resp.content)
pp(d)