def test_insert_docs(): """ DESCR: gets three blogs, and then inserts, third is a duplicate to test key error. INPUT: none OUTPUT: none """ test1_url = 'randomjohn.wordpress.com' test2_url = 'hollyfitness.wordpress.com' test3_url = 'randomjohn.wordpress.com' stuff = [test1_url, test2_url, test3_url] # Get all blogs for url in stuff: blog_info = get_whole_blog(url) # If it works insert if blog_info: try: blog_info['_id'] = blog_info['ID'] #to have specified key table.insert(blog_info) except DuplicateKeyError: print "ERROR: Duplicate error on insert of {}".format(blog_info['ID']) except: print "ERROR: unknown insert error of {}".format(blog_info['ID']) else: print "ERROR: Problem get blog for url: {}".format(url)
def get_and_insert_doc(url): """ DESCR: get a single blog object, insert into mongodb INPUT: url - string, domain of wordpress blog OUTPUT: none """ try: length = get_num_posts_byurl(url) print length if length > BLOG_SIZE_MAX: print "Blog too big({}): {}".format(length, url) with open('errors.txt', "a") as f: f.write("Blog too big({}): {}\n".format(length, url)) elif length < BLOG_SIZE_MIN: print "Blog too small({}): {}".format(length, url) with open('errors.txt', "a") as f: f.write("Blog too small({}): {}\n".format(length, url)) # Ok its nots too big or too small, go ahead and get all of it else: blog_info = get_whole_blog(url) # Non empty blog object means success and is interpreted as True if blog_info: try: blog_info['_id'] = blog_info['ID'] #to have specified key table.insert(blog_info) except DuplicateKeyError: print "ERROR: Duplicate error on insert of {}".format(blog_info['ID']) with open('errors.txt', "a") as f: f.write("ERROR: Duplicate error on insert of {}\n".format(url)) except Exception as e: print "ERROR: {} when inserting {}".format(str(e), blog_info['ID']) with open('errors.txt', "a") as f: f.write("ERROR: {} when inserting {}\n".format(str(e), url)) else: with open('errors.txt', "a") as f: f.write("ERROR: problem getting info on {}\n".format(url)) except Exception as e: with open('errors.txt', "a") as f: f.write('problem with blog {} retrieval, exception: {}\n'.format(url, str(e)))