def get_goodreads_details(url): """Given a goodreads URL, get title, creator, etc. """ # Look for something like http://www.goodreads.com/book/show/40694.Ray_Bradbury_s_Fahrenheit_451 details = {} #TODO: we should do some validation/error handling here url = url + '.xml' url_params = {'key' : GOODREADS['KEY']} data = urllib.urlencode(url_params) url = url + '?' + data req = urllib2.Request(url) response = urllib2.urlopen(req) raw_response = response.read() root = objectify.fromstring(raw_response) details['format'] = 'book' if hasattr(root.book, 'title'): details['title'] = root.book.title if hasattr(root.book.authors.author, 'name'): details['creator'] = root.book.authors.author.name if hasattr(root.book.work, 'original_publication_year'): details['pub_date'] = get_year_from_raw_date(root.book.work.original_publication_year) if hasattr(root.book, 'isbn'): details['isbn'] = root.book.isbn if hasattr(root.book, 'num_pages') and len(root.book.num_pages.text) > 0: details['measurement_page_numeric'] = root.book.num_pages return details
def get_goodreads_details(url): """Given a goodreads URL, get title, creator, etc. """ # Look for something like http://www.goodreads.com/book/show/40694.Ray_Bradbury_s_Fahrenheit_451 details = {} #TODO: we should do some validation/error handling here url = url + '.xml' url_params = {'key': GOODREADS['KEY']} data = urllib.urlencode(url_params) url = url + '?' + data req = urllib2.Request(url) response = urllib2.urlopen(req) raw_response = response.read() root = objectify.fromstring(raw_response) details['format'] = 'book' if hasattr(root.book, 'title'): details['title'] = root.book.title if hasattr(root.book.authors.author, 'name'): details['creator'] = root.book.authors.author.name if hasattr(root.book.work, 'original_publication_year'): details['pub_date'] = get_year_from_raw_date( root.book.work.original_publication_year) if hasattr(root.book, 'isbn'): details['isbn'] = root.book.isbn if hasattr(root.book, 'num_pages') and len(root.book.num_pages.text) > 0: details['measurement_page_numeric'] = root.book.num_pages return details
def get_amazon_details(url): """Given an Amazon URL, get title, creator, etc. from imdapi.com """ matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url) asin = matches.group(1) aws_key = AMZ['KEY'] aws_secret_key = AMZ['SECRET_KEY'] aws_associate_tag = AMZ['ASSOCIATE_TAG'] details = {} amazon = bottlenose.Amazon(aws_key, aws_secret_key, aws_associate_tag) response = amazon.ItemLookup(ItemId=asin, ResponseGroup="Large", IdType="ASIN") root = objectify.fromstring(response) if hasattr(root.Items.Item.ItemAttributes, 'Author'): details['creator'] = root.Items.Item.ItemAttributes.Author elif hasattr(root.Items.Item.ItemAttributes, 'Artist'): details['creator'] = root.Items.Item.ItemAttributes.Artist elif hasattr(root.Items.Item.ItemAttributes, 'Director'): details['creator'] = root.Items.Item.ItemAttributes.Director if hasattr(root.Items.Item.ItemAttributes, 'Title'): details['title'] = root.Items.Item.ItemAttributes.Title if hasattr(root.Items.Item.ItemAttributes, 'ISBN'): details['isbn'] = root.Items.Item.ItemAttributes.ISBN.text if hasattr(root.Items.Item.ItemAttributes, 'NumberOfPages'): details[ 'measurement_page_numeric'] = root.Items.Item.ItemAttributes.NumberOfPages if hasattr(root.Items.Item.ItemAttributes, 'PackageDimensions') and hasattr( root.Items.Item.ItemAttributes.PackageDimensions, 'Length'): amz_length = int( root.Items.Item.ItemAttributes.PackageDimensions['Length']) height_in_inches = '{0:.2g}'.format((amz_length / 100.0) * 2.54) details['measurement_height_numeric'] = height_in_inches if hasattr(root.Items.Item.ItemAttributes, 'ProductGroup'): if root.Items.Item.ItemAttributes.ProductGroup.text == 'Music': details['format'] = 'Sound Recording' elif root.Items.Item.ItemAttributes.ProductGroup.text == 'DVD': details['format'] = 'Video/Film' else: details['format'] = 'book' if hasattr(root.Items.Item.ItemAttributes, 'PublicationDate'): details['pub_date'] = get_year_from_raw_date( root.Items.Item.ItemAttributes.PublicationDate.text) return details
def get_amazon_details(url): """Given an Amazon URL, get title, creator, etc. from imdapi.com """ matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url) asin = matches.group(1) aws_key = AMZ['KEY'] aws_secret_key = AMZ['SECRET_KEY'] aws_associate_tag = AMZ['ASSOCIATE_TAG'] details = {} amazon = bottlenose.Amazon(aws_key, aws_secret_key, aws_associate_tag) response = amazon.ItemLookup(ItemId=asin, ResponseGroup="Large", IdType="ASIN") root = objectify.fromstring(response) if hasattr(root.Items.Item.ItemAttributes, 'Author'): details['creator'] = root.Items.Item.ItemAttributes.Author elif hasattr(root.Items.Item.ItemAttributes, 'Artist'): details['creator'] = root.Items.Item.ItemAttributes.Artist elif hasattr(root.Items.Item.ItemAttributes, 'Director'): details['creator'] = root.Items.Item.ItemAttributes.Director if hasattr(root.Items.Item.ItemAttributes, 'Title'): details['title'] = root.Items.Item.ItemAttributes.Title if hasattr(root.Items.Item.ItemAttributes, 'ISBN'): details['isbn'] = root.Items.Item.ItemAttributes.ISBN.text if hasattr(root.Items.Item.ItemAttributes, 'NumberOfPages'): details['measurement_page_numeric'] = root.Items.Item.ItemAttributes.NumberOfPages if hasattr(root.Items.Item.ItemAttributes, 'PackageDimensions') and hasattr(root.Items.Item.ItemAttributes.PackageDimensions, 'Length'): amz_length = int(root.Items.Item.ItemAttributes.PackageDimensions['Length']) height_in_inches = '{0:.2g}'.format((amz_length / 100.0) * 2.54) details['measurement_height_numeric'] = height_in_inches if hasattr(root.Items.Item.ItemAttributes, 'ProductGroup'): if root.Items.Item.ItemAttributes.ProductGroup.text == 'Music': details['format'] = 'Sound Recording' elif root.Items.Item.ItemAttributes.ProductGroup.text == 'DVD': details['format'] = 'Video/Film' else: details['format'] = 'book' if hasattr(root.Items.Item.ItemAttributes, 'PublicationDate'): details['pub_date'] = get_year_from_raw_date(root.Items.Item.ItemAttributes.PublicationDate.text) return details
def get_musicbrainz_details(url): """Given an musicbrainz URL, get title, creator, etc. """ # Look for something like http://musicbrainz.org/release-group/20e845b1-c6b5-44f7-ab7e-cbc0e33767b5 # or http://musicbrainz.org/release/b21c3cb7-a82d-4d9d-aef7-56569ca734be # See http://musicbrainz.org/doc/XML_Web_Service/Version_2#Lookups matches = re.search(r'musicbrainz\.org/([a-zA-Z\-]+)/([0\w\-]+)', url) mb_resource = matches.group(1) mb_id = matches.group(2) details = {} url = 'http://musicbrainz.org/ws/2/' + mb_resource + '/' + mb_id url_params = {'inc': 'artists'} data = urllib.urlencode(url_params) url = url + '?' + data req = urllib2.Request(url) response = urllib2.urlopen(req) raw_response = response.read() tree = ElementTree(fromstring(raw_response)) namespace = "{http://musicbrainz.org/ns/mmd-2.0#}" title_element = tree.find('*/{0}title'.format(namespace)) name_element = tree.find( '*/{0}artist-credit/{0}name-credit/{0}artist/{0}name'.format( namespace)) date_element = tree.find('*/{0}date'.format(namespace)) # If we don't find the date (date element only occurs in the release type), try # the first-release-date. # TODO: There's probably a clever xpath way to do this if date_element is None: date_element = tree.find('*/{0}first-release-date'.format(namespace)) details['title'] = title_element.text details['format'] = 'Sound Recording' details['creator'] = name_element.text details['pub_date'] = get_year_from_raw_date(date_element.text) return details
def get_musicbrainz_details(url): """Given an musicbrainz URL, get title, creator, etc. """ # Look for something like http://musicbrainz.org/release-group/20e845b1-c6b5-44f7-ab7e-cbc0e33767b5 # or http://musicbrainz.org/release/b21c3cb7-a82d-4d9d-aef7-56569ca734be # See http://musicbrainz.org/doc/XML_Web_Service/Version_2#Lookups matches = re.search(r'musicbrainz\.org/([a-zA-Z\-]+)/([0\w\-]+)', url) mb_resource = matches.group(1) mb_id = matches.group(2) details = {} url = 'http://musicbrainz.org/ws/2/' + mb_resource + '/' + mb_id url_params = {'inc' : 'artists'} data = urllib.urlencode(url_params) url = url + '?' + data req = urllib2.Request(url) response = urllib2.urlopen(req) raw_response = response.read() tree = ElementTree(fromstring(raw_response)) namespace = "{http://musicbrainz.org/ns/mmd-2.0#}" title_element = tree.find('*/{0}title'.format(namespace)) name_element = tree.find('*/{0}artist-credit/{0}name-credit/{0}artist/{0}name'.format(namespace)) date_element = tree.find('*/{0}date'.format(namespace)) # If we don't find the date (date element only occurs in the release type), try # the first-release-date. # TODO: There's probably a clever xpath way to do this if date_element is None: date_element = tree.find('*/{0}first-release-date'.format(namespace)) details['title'] = title_element.text details['format'] = 'Sound Recording' details['creator'] = name_element.text details['pub_date'] = get_year_from_raw_date(date_element.text) return details