Esempio n. 1
0
def main():
    # Attempt to retrieve and process the data from the Unoffical Hacker News API
    for i in range(RETRY_ATTEMPTS + 1):
        try:
            response = urllib2.urlopen(HACKER_NEWS_API_URL)
            status_code = response.code
        except urllib2.HTTPError as e:
            status_code = e.code

        # If the service errored, hit it again
        if status_code != 200:
            if i <= RETRY_ATTEMPTS:
                print("An error occured while retrieving the data, retrying (%d)..." % (i + 1), file=sys.stderr)
            continue

        # If everything went ok, try to load the data
        try:
            items = json.load(response)["items"]
            break
        except ValueError as e:
            if i <= RETRY_ATTEMPTS:
                print("An error occurred while loading the data, retrying (%d)..." % i + 1, file=sys.stderr)
            continue
    else:
        sys.exit("Too many errors occurred while attempting to retrieve the data")

    # Add the stories to the database
    moderator = User.objects.get(username=USERNAME)
    for item in items:
        story = Story(title=item["title"], url=item["url"], points=item["points"], moderator=moderator)
        story.save()
        story.created_at = created_at(item)
        story.save()
Esempio n. 2
0
def save_item(json_item):
	global STORY_COUNT
	# Add the stories to the database
	moderator = User.objects.get(username='******')

	for item in json_item:
		STORY_COUNT += 1
		print item['data']['title'].encode('utf-8')
		print "http://www.reddit.com" + item['data']['permalink'].encode('utf-8')
		print datetime.fromtimestamp(item['data']['created_utc']).strftime('%Y-%m-%d %H:%M:%S.000000+00:00'), '\n'

		story = Story(title=item['data']['title'],
			url="http://www.reddit.com" + item['data']['permalink'],
			points=1,
			moderator=moderator)
		story.save()
		story.created_at = datetime.fromtimestamp(item['data']['created_utc']).strftime('%Y-%m-%d %H:%M:%S.000000+00:00')
		story.save()
Esempio n. 3
0
def main():
    # Get the Top 100 Stories id's as list of int's
    top_stories_ids = get_data(HACKER_NEWS_API_TOP_STORIES_URL)

    # Get & load the json data of the Top 30 Stories by their id's
    for id in top_stories_ids[:30]:
        story = get_data(HACKER_NEWS_API_STORY_URL % id)
        story_created_at = created_at(story['time'])

        # Add the stories to the database
        moderator = User.objects.get(username=USERNAME)
        story = Story(
            title=story['title'],
            url=story['url'],
            points=story['score'],
            moderator=moderator,)
        story.save()
        story.created_at = story_created_at
        story.save()
Esempio n. 4
0
def main():
    # Attempt to retrieve and process the data from the Unoffical Hacker News API
    for i in range(RETRY_ATTEMPTS + 1):
        try:
            response = urlopen(HACKER_NEWS_API_URL)
            status_code = response.code
        except HTTPError as e:
            status_code = e.code
            print (e.code)


        # If the service errored, hit it again
        if status_code != 200:
            if i <= RETRY_ATTEMPTS:
                print("An error occured while retrieving the data, retrying (%d)..." % (i+1), file=sys.stderr)
                print (status_code.__str__())
            continue

        # If everything went ok, try to load the data
        try:
            items = json.loads(response.read().decode('utf-8'))['items']
            break
        except ValueError as e:
            if i <= RETRY_ATTEMPTS:
                print("An error occurred while loading the data, retrying (%d)..." % i+1, file=sys.stderr)
            continue
    else:
        sys.exit("Too many errors occurred while attempting to retrieve the data")

    # Add the stories to the database
    moderator = User.objects.get(username=USERNAME)
    for item in items:
        story = Story(
            title=item['title'],
            url=item['url'],
            points=item['points'],
            moderator=moderator)
        story.save()
        story.created_at = created_at(item)
        story.save()
Esempio n. 5
0
        # If everything went ok, try to load the data
        try:
            items = json.loads(response.content)['items']
            break
        except ValueError, e:
            if i <= RETRY_ATTEMPTS:
                print(
                    "An error occurred while loading the data, retrying (%d)..."
                    % i + 1,
                    file=sys.stderr)
            continue
    else:
        sys.exit(
            "Too many errors occurred while attempting to retrieve the data")

    # Add the stories to the database
    moderator = User.objects.get(username=USERNAME)
    for item in items:
        story = Story(title=item['title'],
                      url=item['url'],
                      points=item['points'],
                      moderator=moderator)
        story.save()
        story.created_at = created_at(item)
        story.save()


if __name__ == '__main__':
    main()
Esempio n. 6
0
        # If the service errored, hit it again
        if status_code != 200:
            if i <= RETRY_ATTEMPTS:
                print("An error occured while retrieving the data, retrying (%d)..." % (i+1), file=sys.stderr)
            continue

        # If everything went ok, try to load the data
        try:
            items = json.load(response)['items']
            break
        except ValueError, e:
            if i <= RETRY_ATTEMPTS:
                print("An error occurred while loading the data, retrying (%d)..." % i+1, file=sys.stderr)
            continue
    else:
        sys.exit("Too many errors occurred while attempting to retrieve the data")

    # Add the stories to the database
    moderator = User.objects.get(username=USERNAME)
    for item in items:
        story = Story(
            title=item['title'],
            url=item['url'],
            points=item['points'],
            moderator=moderator)
        story.save()
        story.created_at = created_at(item)
        story.save()

if __name__ == '__main__':
    main()
    def handle(self, *args, **options):
        if options['flush']:
            old = Story.objects.all()
            confirm = raw_input(
                'This will delete all %d existing stories. Are you sure? [y/N] '
                % old.count())
            if confirm == 'y':
                old.delete()

        # get input_file from stdin
        input_file = fileinput.input(args)
        temp_file = tempfile.TemporaryFile()
        # save to temp storage for json parsing
        for line in input_file:
            temp_file.write(line)
        temp_file.seek(0)

        with temp_file as jsonfile:
            stories = json.load(jsonfile)

            n_s, n_a = (0, 0)
            for data in stories['data']:
                story = Story(content=data.get('Content'))

                author, new_author = Author.objects.get_or_create_user(
                    user__name=data.get('UserName'))
                if new_author:
                    n_a = n_a + 1
                    author.part_time = bool(data.get('PartTime'))
                    author.employed = bool(data.get('Employed'))
                    author.employer = data.get('Employer')
                    author.occupation = data.get('Occupation')
                    if author.user.last_name.lower() == "anonymous":
                        author.anonymous = True

                    author.save()
                story.author = author

                if data.get('Truncated'):
                    story.truncated = True

                if data.get('Latitude') and data.get('Longitude'):
                    location, new_location = Location.objects.get_or_create(
                        city=data.get('City'), state=data.get('State'))
                    if new_location and data.get('Latitude') and data.get(
                            'Longitude'):
                        location.lat = data.get('Latitude')
                        location.lon = data.get('Longitude')
                        location.geocoded = True
                    location.save()
                    story.location = location
                story.save()
                if data.get('Timestamp'):
                    story.created_at = data['Timestamp']
                else:
                    # old, put it before anything else
                    story.created_at = datetime(2013, 7, 1, 0, 0)
                story.updated_at = datetime.now()
                story.save()

                n_s = n_s + 1

            self.stdout.write("imported %d stories by %d authors" % (n_s, n_a))
Esempio n. 8
0
    if timeName == 'hour' or timeName == 'hours':
        createTime = timeNow - datetime.timedelta(hours=timeValue)
    elif timeName == 'minute' or timeName == 'minutes':
        createTime = timeNow - datetime.timedelta(minutes=timeValue)
    else:
        createTime = timeNow - datetime.timedelta(days=timeValue)

    existingUser = User.objects.filter(username=user)[:1]
    if existingUser:
        existingUser = existingUser[0]
    else:
        existingUser = User(username=user, password='******')
        existingUser.save()

    # Check if story already present
    existingStory = Story.objects.filter(url=url)[:1]
    if not existingStory:
        existingStory = Story()
    else:
        existingStory = existingStory[0]

    existingStory.title = title
    existingStory.url = url
    existingStory.points = points
    existingStory.created_at = createTime
    existingStory.moderator = existingUser
    existingStory.save()

    print title, '::', existingUser.username, '::', points
    def handle(self, *args, **options):
        if options['flush']:
            old = Story.objects.filter(employer__startswith="Walmart")
            confirm = raw_input(
                'This will delete all %d existing Walmart stories. Are you sure? [y/N] '
                % old.count())
            if confirm == 'y':
                old.delete()

        # get input_file from stdin
        input_file = fileinput.input(args)
        temp_file = tempfile.TemporaryFile()
        # save to temp storage for json parsing
        for line in input_file:
            temp_file.write(line)
        temp_file.seek(0)

        with temp_file as jsonfile:
            stories = json.load(jsonfile)

            n_s, n_a, n_l = (0, 0, 0)
            for data in stories:
                try:
                    story, new_story = Story.objects.get_or_create(
                        content=data.get('story'))
                except Story.MultipleObjectsReturned:
                    duplicates = Story.objects.filter(
                        content=data.get('story'))
                    duplicates.delete()

                    story = Story(content=data.get('story'))

                first_name = unidecode(data.get('fname'))
                last_name = unidecode(data.get('lname'))

                author, new_author = Author.objects.get_or_create_user(
                    first_name=first_name, last_name=last_name)
                if new_author:
                    n_a = n_a + 1

                    if data.get('email'):
                        author.user.email = data.get('email')

                    author.user.active = False
                    author.user.save()

                    if data.get('store'):
                        author.employer = "Walmart #" + data.get('store')
                    else:
                        author.employer = "Walmart"

                    if data.get('associate'):
                        author.occupation = "Associate"

                    if data.get('anonymous'):
                        author.anonymous = True

                    author.save()

                story.author = author

                if data.get('zip'):
                    zipcode = data.get('zip')

                    # do zip -> city lookup inline
                    zip_lookup = requests.get("http://api.zippopotam.us/us/" +
                                              zipcode)
                    print "lookup", zipcode
                    place = zip_lookup.json().get('places', [{}])[0]
                    city = place.get('place name')
                    state = place.get('state abbreviation')
                    lat, lon = place.get('latitude'), place.get('longitude')

                    try:
                        location, new_location = Location.objects.get_or_create(
                            city__iexact=city, state=state)
                    except Location.MultipleObjectsReturned:
                        duplicate_locations = Location.objects.filter(
                            city__iexact=city, state=state)
                        stories_at_location = duplicate_locations.values_list(
                            'story', flat=True)
                        duplicate_locations.delete()

                        location = Location(city=city, state=state)
                        location.save()

                        for reset_id in stories_at_location:
                            try:
                                reset_story = Story.objects.get(id=reset_id)
                                reset_story.location = location
                                reset_story.save()
                            except Story.DoesNotExist:
                                pass

                        new_location = True

                    if new_location and lat and lon:
                        location.city = city
                        location.state = state
                        location.lat = lat
                        location.lon = lon
                        location.geocoded = True
                        location.save()

                        n_l = n_l + 1

                    story.location = location
                story.save()

                # export date from OurWalmart
                story.created_at = datetime(2013, 9, 4, 0, 0)
                story.updated_at = datetime.now()

                story.save()

                if new_story:
                    n_s = n_s + 1

            self.stdout.write(
                "imported %d stories by %d authors in %d locations" %
                (n_s, n_a, n_l))