Example #1
0
def get_json(topic):
    r = requests.get("http://itunes.apple.com/search?term=" + topic + "&entity=software&limit=300")
    j = simplejson.loads(r.content)
    for i in j["results"]:
        if not Apps.objects.filter(name=get_name(i)):
            if (i["primaryGenreName"] in ["Education", "Reference"]) or (
                ("Education" in i["genres"]) and (i["primaryGenreName"] in acceptable_topics)
            ):
                try:
                    m1 = Apps(
                        name=get_name(i),
                        description=get_description(i),
                        creator=get_creator(i),
                        subject=i["primaryGenreName"],
                        price=get_price(i),
                        rating=get_rating(i),
                        artwork=get_artwork(i),
                        link=get_link(i),
                    )
                    # date_added=datetime.datetime.now()) <-- check if needed
                    m1.save()
                    print get_name(i) + " was added to the database! ", topic
                except:
                    print get_name(i), get_link(i)
                    raise
            else:
                print get_name(i) + " is not an Education app! " + str(i["genres"])
Example #2
0
def add_update(link, val):
	# future: find downloads, age, comment
	soup = get_soup(link)

	name = get_name(soup)
	creator = get_creator(soup)
	platform = get_platform(soup)
	subject = get_subject(soup)
	if subject == 'NotEducation': # change to "creator == 'NotEducation', set in classifier"
		print "The app '%s' is not Education-related and was not added to the database!" % name
	else:
		price = get_price(soup)
		rating = get_rating(soup)
		artwork = get_artwork(soup)

		if val == 0:
			m1 = Apps(name=name, platform=platform, 
				creator=creator, subject=subject, 
				price=price, rating=rating,
				artwork=artwork, link=link,
				date_added=datetime.datetime.now())
			m1.save()
			print "The app '%s' was successfully added to the database!" % name
		elif val == 1:
			m1 = Apps.objects.filter(link=link)
			m1.update(name=name, platform=platform, 
				creator=creator, subject=subject, 
				price=price, rating=rating,
				artwork=artwork, link=link)
			print "The app '%s' was successfully updated!" % name
		else:
			print "What the hell are you doing with your code!?"
Example #3
0
def get_json(topic):
    r = requests.get('http://itunes.apple.com/search?term=' + topic +
                     '&entity=software&limit=300')
    j = simplejson.loads(r.content)
    for i in j['results']:
        if not Apps.objects.filter(name=get_name(i)):
            if (i['primaryGenreName'] in ['Education', 'Reference']) or (
                ('Education' in i['genres']) and
                (i['primaryGenreName'] in acceptable_topics)):
                try:
                    m1 = Apps(name=get_name(i),
                              description=get_description(i),
                              creator=get_creator(i),
                              subject=i['primaryGenreName'],
                              price=get_price(i),
                              rating=get_rating(i),
                              artwork=get_artwork(i),
                              link=get_link(i))
                    # date_added=datetime.datetime.now()) <-- check if needed
                    m1.save()
                    print get_name(i) + " was added to the database! ", topic
                except:
                    print get_name(i), get_link(i)
                    raise
            else:
                print get_name(i) + " is not an Education app! " + str(
                    i['genres'])
Example #4
0
def add_to_db(d):
	for entry in d:
		if not Apps.objects.filter(name=entry):
			m1 = Apps(name=entry, platform=d[entry]['platform'], 
				creator=d[entry]['creator'], subject=d[entry]['subject'], 
				price=d[entry]['price'], rating=d[entry]['rating'],
				artwork=d[entry]['artwork'], link=d[entry]['link'])
			m1.save()
		else:
			print "The app '%s' is already in the database!" % entry
Example #5
0
def add_update(link, val):
    # future: find downloads, age, comment
    soup = get_soup(link)

    name = get_name(soup)
    creator = get_creator(soup)
    platform = get_platform(soup)
    subject = get_subject(soup)
    if subject == 'NotEducation':  # change to "creator == 'NotEducation', set in classifier"
        print "The app '%s' is not Education-related and was not added to the database!" % name
    else:
        price = get_price(soup)
        rating = get_rating(soup)
        artwork = get_artwork(soup)

        if val == 0:
            m1 = Apps(name=name,
                      platform=platform,
                      creator=creator,
                      subject=subject,
                      price=price,
                      rating=rating,
                      artwork=artwork,
                      link=link,
                      date_added=datetime.datetime.now())
            m1.save()
            print "The app '%s' was successfully added to the database!" % name
        elif val == 1:
            m1 = Apps.objects.get(link=link)
            m1.update(name=name,
                      platform=platform,
                      creator=creator,
                      subject=subject,
                      price=price,
                      rating=rating,
                      artwork=artwork,
                      link=link)
            print "The app '%s' was successfully updated!" % name
        else:
            print "What the hell are you doing with your code!?"
Example #6
0
def page_to_db(link):
	# downloads, age, comment
	
	foobar = requests.get(link).text
	soup = BeautifulSoup(foobar)

	# get_name
	try:
		a = re.compile('<title>.+</title>').findall(str(soup))
		title = a[0]
		begin, end = title.index('>')+1, title.index('<',1)
		full_name = title[begin:end]
		if 'App Store - ' in full_name:
			name = full_name[full_name.index('Store - ')+8:]
		elif ' for iP' in full_name:
			name = full_name[:full_name.index(' for iP')]
		else:
			name = full_name
	except (AttributeError, ValueError, TypeError):
		name = link
		print "Error retrieving name from %s." % link

	# get_creator
	try:
		a = soup.find('div', {'class':'lockup product application'})
		b = str(a.find('li', {'class':'copyright'}).previous_sibling.get_text())
		creator = b[8:]
	except (AttributeError, ValueError, TypeError):
		creator = ''
		print "Error retrieving creator for %s." % name
		
	# get_platform
	try:
		platform = list(soup.find_all('a', {'metrics-loc' : 'Pill_'})) # list
		for key, value in enumerate(platform):
			platform[key] = re.compile('>[^<]*<').findall(str(value))[0].lstrip('>').rstrip('<')
		platform = '/'.join(platform[:])
	except (AttributeError, ValueError, TypeError):
		platform = ''
		print "Error retrieving platform for %s." % name
	
	# get_subject
	try:
		a = soup.find('div', {'class':'lockup product application'})
		subject = str(a.find('span', {'class':'label'}).next_sibling.get_text())
	except (AttributeError, ValueError, TypeError):
		subject = ''
		print "Error retrieving subject for %s." % name
		
	# get_price
	try:
		price_text = soup.find('div', {'class':'price'}).get_text()
		try:
			price = float(price_text[1:]) # get rid of $ in front
		except ValueError:
			if price_text == ("Free" or "free"):
				price = float(0)
			else:
				price = None
				print "Error retrieving float-type price for %s." % name
	except (AttributeError, TypeError):
		price = None
		print "Error retrieving price for %s." % name
		
	# get_rating
	try:
		rating_text = soup.find('div', {'class': 'rating'}).get('aria-label')
		rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
		rating = float(rating_num[0])
	except AttributeError:
		try:
			rating_text = soup.find('div', {'class': 'app-rating'}).a.get_text()
			rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
			rating = float(rating_num[0])
		except (AttributeError, IndexError, TypeError, ValueError):
			rating = None
	except (IndexError, ValueError, TypeError):
		rating = None
		print "Error retrieving rating for %s." % name
	
	# get_artwork
	try:
		artwork = str(soup.find('img', {'class': 'artwork'}).get('src'))
	except (AttributeError, ValueError, TypeError):
		artwork = ''
		print "Error retrieving artwork for %s." % name

	m1 = Apps(name=name, platform=platform, 
		creator=creator, subject=subject, 
		price=price, rating=rating,
		artwork=artwork, link=link,
		crawl_binary=0, date_added=datetime.datetime.now())
	m1.save()

	print "The app '%s' was successfully added to the database!" % name
def page_to_db(link):
    # downloads, age, comment

    foobar = requests.get(link).text
    soup = BeautifulSoup(foobar)

    # get_name
    try:
        a = re.compile('<title>.+</title>').findall(str(soup))
        title = a[0]
        begin, end = title.index('>') + 1, title.index('<', 1)
        full_name = title[begin:end]
        if 'App Store - ' in full_name:
            name = full_name[full_name.index('Store - ') + 8:]
        elif ' for iP' in full_name:
            name = full_name[:full_name.index(' for iP')]
        else:
            name = full_name
    except (AttributeError, ValueError, TypeError):
        name = link
        print "Error retrieving name from %s." % link

    # get_creator
    try:
        a = soup.find('div', {'class': 'lockup product application'})
        b = str(
            a.find('li', {
                'class': 'copyright'
            }).previous_sibling.get_text())
        creator = b[8:]
    except (AttributeError, ValueError, TypeError):
        creator = ''
        print "Error retrieving creator for %s." % name

    # get_platform
    try:
        platform = list(soup.find_all('a', {'metrics-loc': 'Pill_'}))  # list
        for key, value in enumerate(platform):
            platform[key] = re.compile('>[^<]*<').findall(
                str(value))[0].lstrip('>').rstrip('<')
        platform = '/'.join(platform[:])
    except (AttributeError, ValueError, TypeError):
        platform = ''
        print "Error retrieving platform for %s." % name

    # get_subject
    try:
        a = soup.find('div', {'class': 'lockup product application'})
        subject = str(
            a.find('span', {
                'class': 'label'
            }).next_sibling.get_text())
    except (AttributeError, ValueError, TypeError):
        subject = ''
        print "Error retrieving subject for %s." % name

    # get_price
    try:
        price_text = soup.find('div', {'class': 'price'}).get_text()
        try:
            price = float(price_text[1:])  # get rid of $ in front
        except ValueError:
            if price_text == ("Free" or "free"):
                price = float(0)
            else:
                price = None
                print "Error retrieving float-type price for %s." % name
    except (AttributeError, TypeError):
        price = None
        print "Error retrieving price for %s." % name

    # get_rating
    try:
        rating_text = soup.find('div', {'class': 'rating'}).get('aria-label')
        rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
        rating = float(rating_num[0])
    except AttributeError:
        try:
            rating_text = soup.find('div', {
                'class': 'app-rating'
            }).a.get_text()
            rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
            rating = float(rating_num[0])
        except (AttributeError, IndexError, TypeError, ValueError):
            rating = None
    except (IndexError, ValueError, TypeError):
        rating = None
        print "Error retrieving rating for %s." % name

    # get_artwork
    try:
        artwork = str(soup.find('img', {'class': 'artwork'}).get('src'))
    except (AttributeError, ValueError, TypeError):
        artwork = ''
        print "Error retrieving artwork for %s." % name

    m1 = Apps(name=name,
              platform=platform,
              creator=creator,
              subject=subject,
              price=price,
              rating=rating,
              artwork=artwork,
              link=link,
              crawl_binary=0,
              date_added=datetime.datetime.now())
    m1.save()

    print "The app '%s' was successfully added to the database!" % name