def get_json(topic): r = requests.get("http://itunes.apple.com/search?term=" + topic + "&entity=software&limit=300") j = simplejson.loads(r.content) for i in j["results"]: if not Apps.objects.filter(name=get_name(i)): if (i["primaryGenreName"] in ["Education", "Reference"]) or ( ("Education" in i["genres"]) and (i["primaryGenreName"] in acceptable_topics) ): try: m1 = Apps( name=get_name(i), description=get_description(i), creator=get_creator(i), subject=i["primaryGenreName"], price=get_price(i), rating=get_rating(i), artwork=get_artwork(i), link=get_link(i), ) # date_added=datetime.datetime.now()) <-- check if needed m1.save() print get_name(i) + " was added to the database! ", topic except: print get_name(i), get_link(i) raise else: print get_name(i) + " is not an Education app! " + str(i["genres"])
def add_update(link, val): # future: find downloads, age, comment soup = get_soup(link) name = get_name(soup) creator = get_creator(soup) platform = get_platform(soup) subject = get_subject(soup) if subject == 'NotEducation': # change to "creator == 'NotEducation', set in classifier" print "The app '%s' is not Education-related and was not added to the database!" % name else: price = get_price(soup) rating = get_rating(soup) artwork = get_artwork(soup) if val == 0: m1 = Apps(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link, date_added=datetime.datetime.now()) m1.save() print "The app '%s' was successfully added to the database!" % name elif val == 1: m1 = Apps.objects.filter(link=link) m1.update(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link) print "The app '%s' was successfully updated!" % name else: print "What the hell are you doing with your code!?"
def get_json(topic): r = requests.get('http://itunes.apple.com/search?term=' + topic + '&entity=software&limit=300') j = simplejson.loads(r.content) for i in j['results']: if not Apps.objects.filter(name=get_name(i)): if (i['primaryGenreName'] in ['Education', 'Reference']) or ( ('Education' in i['genres']) and (i['primaryGenreName'] in acceptable_topics)): try: m1 = Apps(name=get_name(i), description=get_description(i), creator=get_creator(i), subject=i['primaryGenreName'], price=get_price(i), rating=get_rating(i), artwork=get_artwork(i), link=get_link(i)) # date_added=datetime.datetime.now()) <-- check if needed m1.save() print get_name(i) + " was added to the database! ", topic except: print get_name(i), get_link(i) raise else: print get_name(i) + " is not an Education app! " + str( i['genres'])
def add_to_db(d): for entry in d: if not Apps.objects.filter(name=entry): m1 = Apps(name=entry, platform=d[entry]['platform'], creator=d[entry]['creator'], subject=d[entry]['subject'], price=d[entry]['price'], rating=d[entry]['rating'], artwork=d[entry]['artwork'], link=d[entry]['link']) m1.save() else: print "The app '%s' is already in the database!" % entry
def add_update(link, val): # future: find downloads, age, comment soup = get_soup(link) name = get_name(soup) creator = get_creator(soup) platform = get_platform(soup) subject = get_subject(soup) if subject == 'NotEducation': # change to "creator == 'NotEducation', set in classifier" print "The app '%s' is not Education-related and was not added to the database!" % name else: price = get_price(soup) rating = get_rating(soup) artwork = get_artwork(soup) if val == 0: m1 = Apps(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link, date_added=datetime.datetime.now()) m1.save() print "The app '%s' was successfully added to the database!" % name elif val == 1: m1 = Apps.objects.get(link=link) m1.update(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link) print "The app '%s' was successfully updated!" % name else: print "What the hell are you doing with your code!?"
def page_to_db(link): # downloads, age, comment foobar = requests.get(link).text soup = BeautifulSoup(foobar) # get_name try: a = re.compile('<title>.+</title>').findall(str(soup)) title = a[0] begin, end = title.index('>')+1, title.index('<',1) full_name = title[begin:end] if 'App Store - ' in full_name: name = full_name[full_name.index('Store - ')+8:] elif ' for iP' in full_name: name = full_name[:full_name.index(' for iP')] else: name = full_name except (AttributeError, ValueError, TypeError): name = link print "Error retrieving name from %s." % link # get_creator try: a = soup.find('div', {'class':'lockup product application'}) b = str(a.find('li', {'class':'copyright'}).previous_sibling.get_text()) creator = b[8:] except (AttributeError, ValueError, TypeError): creator = '' print "Error retrieving creator for %s." % name # get_platform try: platform = list(soup.find_all('a', {'metrics-loc' : 'Pill_'})) # list for key, value in enumerate(platform): platform[key] = re.compile('>[^<]*<').findall(str(value))[0].lstrip('>').rstrip('<') platform = '/'.join(platform[:]) except (AttributeError, ValueError, TypeError): platform = '' print "Error retrieving platform for %s." % name # get_subject try: a = soup.find('div', {'class':'lockup product application'}) subject = str(a.find('span', {'class':'label'}).next_sibling.get_text()) except (AttributeError, ValueError, TypeError): subject = '' print "Error retrieving subject for %s." % name # get_price try: price_text = soup.find('div', {'class':'price'}).get_text() try: price = float(price_text[1:]) # get rid of $ in front except ValueError: if price_text == ("Free" or "free"): price = float(0) else: price = None print "Error retrieving float-type price for %s." % name except (AttributeError, TypeError): price = None print "Error retrieving price for %s." % name # get_rating try: rating_text = soup.find('div', {'class': 'rating'}).get('aria-label') rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text) rating = float(rating_num[0]) except AttributeError: try: rating_text = soup.find('div', {'class': 'app-rating'}).a.get_text() rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text) rating = float(rating_num[0]) except (AttributeError, IndexError, TypeError, ValueError): rating = None except (IndexError, ValueError, TypeError): rating = None print "Error retrieving rating for %s." % name # get_artwork try: artwork = str(soup.find('img', {'class': 'artwork'}).get('src')) except (AttributeError, ValueError, TypeError): artwork = '' print "Error retrieving artwork for %s." % name m1 = Apps(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link, crawl_binary=0, date_added=datetime.datetime.now()) m1.save() print "The app '%s' was successfully added to the database!" % name
def page_to_db(link): # downloads, age, comment foobar = requests.get(link).text soup = BeautifulSoup(foobar) # get_name try: a = re.compile('<title>.+</title>').findall(str(soup)) title = a[0] begin, end = title.index('>') + 1, title.index('<', 1) full_name = title[begin:end] if 'App Store - ' in full_name: name = full_name[full_name.index('Store - ') + 8:] elif ' for iP' in full_name: name = full_name[:full_name.index(' for iP')] else: name = full_name except (AttributeError, ValueError, TypeError): name = link print "Error retrieving name from %s." % link # get_creator try: a = soup.find('div', {'class': 'lockup product application'}) b = str( a.find('li', { 'class': 'copyright' }).previous_sibling.get_text()) creator = b[8:] except (AttributeError, ValueError, TypeError): creator = '' print "Error retrieving creator for %s." % name # get_platform try: platform = list(soup.find_all('a', {'metrics-loc': 'Pill_'})) # list for key, value in enumerate(platform): platform[key] = re.compile('>[^<]*<').findall( str(value))[0].lstrip('>').rstrip('<') platform = '/'.join(platform[:]) except (AttributeError, ValueError, TypeError): platform = '' print "Error retrieving platform for %s." % name # get_subject try: a = soup.find('div', {'class': 'lockup product application'}) subject = str( a.find('span', { 'class': 'label' }).next_sibling.get_text()) except (AttributeError, ValueError, TypeError): subject = '' print "Error retrieving subject for %s." % name # get_price try: price_text = soup.find('div', {'class': 'price'}).get_text() try: price = float(price_text[1:]) # get rid of $ in front except ValueError: if price_text == ("Free" or "free"): price = float(0) else: price = None print "Error retrieving float-type price for %s." % name except (AttributeError, TypeError): price = None print "Error retrieving price for %s." % name # get_rating try: rating_text = soup.find('div', {'class': 'rating'}).get('aria-label') rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text) rating = float(rating_num[0]) except AttributeError: try: rating_text = soup.find('div', { 'class': 'app-rating' }).a.get_text() rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text) rating = float(rating_num[0]) except (AttributeError, IndexError, TypeError, ValueError): rating = None except (IndexError, ValueError, TypeError): rating = None print "Error retrieving rating for %s." % name # get_artwork try: artwork = str(soup.find('img', {'class': 'artwork'}).get('src')) except (AttributeError, ValueError, TypeError): artwork = '' print "Error retrieving artwork for %s." % name m1 = Apps(name=name, platform=platform, creator=creator, subject=subject, price=price, rating=rating, artwork=artwork, link=link, crawl_binary=0, date_added=datetime.datetime.now()) m1.save() print "The app '%s' was successfully added to the database!" % name