Exemple #1
0
def load_tumblr_posts():
	b = Blog.query.get( 1 )

	# if there's nothing new to load, skip it
	if b.posts_last_updated and ( b.last_updated <= b.posts_last_updated.strftime( '%s' ) ):
		return True

	blogname = environ['TUMBLR']
	api_key = environ['TUMBLR_API_KEY']

	if not blogname or not api_key:
		print 'TUMBLR and TUMBLR_API_KEY must be defined in your .env file!'
		return False

	tumblr_url = 'http://api.tumblr.com/v2/blog/' + blogname + '/posts/'

	# load posts until we run out, or find one we already loaded
	posts = []
	posts_to_request = 20
	found_posts = False

	while not found_posts:
		fp = urlopen( tumblr_url + '?' + urlencode( args ) )
		results = json.load( fp )

		# check return value.  TODO handle this better.
		if 200 != results['meta']['status']:
			print "error fetching posts; no new posts loaded"
			break

		if len( results['response']['posts'] ) < posts_to_request:
			break

		args['offset'] += posts_to_request

		for post in results['response']['posts']:

			if 'title' in post:
				if Post.query.filter( Post.title == post['title'] ).count() > 0:
					found_posts = True
					break

				p = Post( post['title'], post['post_url'] )

				db_session.add( p )

			else:  # no title
				print post


	b.posts_last_updated = datetime.now()

	db_session.commit()

	return True
Exemple #2
0
def load_blog_info():
    # only update info every five minutes
    b = Blog.query.get(1)
    if (b is not None) and (datetime.utcnow() - b.info_last_updated <=
                            timedelta(minutes=5)):
        return True

    wordpress = environ.get('WORDPRESS', False)
    tumblr = environ.get('TUMBLR', False)
    api_key = environ.get('TUMBLR_API_KEY', False)

    # input validation
    error = False
    if not (wordpress or tumblr):
        print 'Please define a blog to connect to (TUMBLR or WORDPRESS)!'
        error = True

    if tumblr and not api_key:
        print 'TUMBLR_API_KEY must be defined in your .env file!'
        error = True

    if wordpress and tumblr:
        print 'Please define one of WORDPRESS or TUMBLR, not both!'
        error = True

    if error:
        return False

    # get blog info
    if wordpress:
        results = connect_wordpress(wordpress)
    else:
        results = connect_tumblr(tumblr, api_key)

    if b is None:
        b = Blog()
        db_session.add(b)

    if wordpress:
        b.title = results['name']
        b.description = results['description']
        b.url = results['URL']
    else:
        bloginfo = results['response']['blog']
        b.title = bloginfo['title']
        b.total_posts = bloginfo['posts']
        b.last_updated = bloginfo['updated']
        b.description = bloginfo['description']
        b.url = bloginfo['url']

    b.info_last_updated = datetime.utcnow()

    db_session.commit()

    return True
Exemple #3
0
def load_tumblr_posts():
    b = Blog.query.get(1)

    # if there's nothing new to load, skip it
    if b.posts_last_updated and (b.last_updated <=
                                 b.posts_last_updated.strftime('%s')):
        return True

    blogname = environ['TUMBLR']
    api_key = environ['TUMBLR_API_KEY']

    if not blogname or not api_key:
        print 'TUMBLR and TUMBLR_API_KEY must be defined in your .env file!'
        return False

    tumblr_url = 'http://api.tumblr.com/v2/blog/' + blogname + '/posts/'

    # load posts until we run out, or find one we already loaded
    posts = []
    posts_to_request = 20
    found_posts = False

    while not found_posts:
        fp = urlopen(tumblr_url + '?' + urlencode(args))
        results = json.load(fp)

        # check return value.  TODO handle this better.
        if 200 != results['meta']['status']:
            print "error fetching posts; no new posts loaded"
            break

        if len(results['response']['posts']) < posts_to_request:
            break

        args['offset'] += posts_to_request

        for post in results['response']['posts']:

            if 'title' in post:
                if Post.query.filter(Post.title == post['title']).count() > 0:
                    found_posts = True
                    break

                p = Post(post['title'], post['post_url'])

                db_session.add(p)

            else:  # no title
                print post

    b.posts_last_updated = datetime.now()

    db_session.commit()

    return True
Exemple #4
0
def load_blog_info():
	# only update info every five minutes
	b = Blog.query.get( 1 )
	if ( b is not None ) and ( datetime.utcnow() - b.info_last_updated <= timedelta( minutes=5 ) ):
		return True

	wordpress = environ.get( 'WORDPRESS', False )
	tumblr = environ.get( 'TUMBLR', False )
	api_key = environ.get( 'TUMBLR_API_KEY', False )

	# input validation
	error = False
	if not ( wordpress or tumblr ):
		print 'Please define a blog to connect to (TUMBLR or WORDPRESS)!'
		error = True

	if tumblr and not api_key:
		print 'TUMBLR_API_KEY must be defined in your .env file!'
		error = True

	if wordpress and tumblr:
		print 'Please define one of WORDPRESS or TUMBLR, not both!'
		error = True

	if error:
		return False

	# get blog info
	if wordpress:
		results = connect_wordpress( wordpress )
	else:
		results = connect_tumblr( tumblr, api_key )

	if b is None:
		b = Blog()
		db_session.add( b )

	if wordpress:
		b.title = results['name']
		b.description = results['description']
		b.url = results['URL']		
	else:
		bloginfo = results['response']['blog']
		b.title = bloginfo['title']
		b.total_posts = bloginfo['posts']
		b.last_updated = bloginfo['updated']
		b.description = bloginfo['description']
		b.url = bloginfo['url']

	b.info_last_updated = datetime.utcnow()

	db_session.commit()

	return True
Exemple #5
0
def load_wordpress_posts():
	b = Blog.query.get( 1 )
	blogname = environ['WORDPRESS']
	wp_url = 'http://public-api.wordpress.com/rest/v1/sites/' + blogname + '/posts'

	posts = []
	count = 0
	posts_to_request = 100
	found_posts = False

	args = { 'offset': 0, 'fields': 'title,URL', 'number': posts_to_request }

	# subtract 7 because the blog is in PST and we're saving timestamps in UTC
	if b.posts_last_updated:
		args['after'] = ( b.posts_last_updated - timedelta( hours=7 ) ).isoformat()

	while True:
		sys.stderr.write( '.' )
		fp = urlopen( wp_url + '?' + urlencode( args ) )
		results = json.load( fp )

		for post in results['posts']:
			if Post.query.filter( Post.title == post['title'] ).count() > 0:
				found_posts = True
				break

			p = Post( post['title'], post['URL'] )

			db_session.add( p )
			count = count + 1

		if len( results['posts'] ) < posts_to_request or found_posts :
			break
		else:
			args['offset'] += posts_to_request

	b.posts_last_updated = datetime.utcnow()
	b.total_posts = Post.query.count()
	db_session.commit()

	print 'Done! ' + str( count ) + ' posts loaded.'
	return True
Exemple #6
0
def load_wordpress_posts():
    b = Blog.query.get(1)
    blogname = environ['WORDPRESS']
    wp_url = 'http://public-api.wordpress.com/rest/v1/sites/' + blogname + '/posts'

    posts = []
    count = 0
    posts_to_request = 100
    found_posts = False

    args = {'offset': 0, 'fields': 'title,URL', 'number': posts_to_request}

    # subtract 7 because the blog is in PST and we're saving timestamps in UTC
    if b.posts_last_updated:
        args['after'] = (b.posts_last_updated - timedelta(hours=7)).isoformat()

    while True:
        sys.stderr.write('.')
        fp = urlopen(wp_url + '?' + urlencode(args))
        results = json.load(fp)

        for post in results['posts']:
            if Post.query.filter(Post.title == post['title']).count() > 0:
                found_posts = True
                break

            p = Post(post['title'], post['URL'])

            db_session.add(p)
            count = count + 1

        if len(results['posts']) < posts_to_request or found_posts:
            break
        else:
            args['offset'] += posts_to_request

    b.posts_last_updated = datetime.utcnow()
    b.total_posts = Post.query.count()
    db_session.commit()

    print 'Done! ' + str(count) + ' posts loaded.'
    return True