def process_lesson_plans(): item_urls = get_item_urls('lesson-plans', 'a[data-type="lesson-plans"]') for item in item_urls: scraped_lesson_plan = scrape_lesson_plan(item) insert_item(scraped_lesson_plan, 'lesson-plans')
def process_activities(): item_urls = get_item_urls('activity', 'a[data-type="activity"]', 0) print(item_urls) num_items = len(item_urls) # TODO remove debugging code iterator = 0 for item in item_urls: scraped_worksheet = scrape_activity(item) insert_item(scraped_worksheet, 'activities') iterator += 1 print(str(iterator) + ' of ' + str(num_items))
def test_insert_item_not_unique_url(self): db_conn = database.init(database_file="insert_item_test.db") title_1 = "Test title 1" url_1 = "test_url_1" category_1 = "test_category" database.insert_item(db_conn, title_1, url_1, category_1) # It should raise an integrity error with self.assertRaises(sqlite3.IntegrityError): title_2 = "Test title 1" category_2 = "test_category" database.insert_item(db_conn, title_2, url_1, category_2) db_conn.close()
def process_worksheets(save=True): # this grabs content from education.com/worksheets and uses that selector to grab the library of links item_urls = get_item_urls('worksheets', 'a[data-type="worksheet"]') # TODO remove debugging code iterator = 0 for item in item_urls: scraped_worksheet = scrape_worksheet(item) if save is True: insert_item(scraped_worksheet) # TODO remove debugging code iterator += 1 print(iterator)
def test_insert_item(self): db_conn = database.init(database_file="insert_item_test.db") title = "Test title" url = "test_url" category = "test_category" database.insert_item(db_conn, title, url, category) # The content should exists in the database c = db_conn.cursor() c.execute( "SELECT title, url, category FROM items WHERE url='{}'".format( url)) result = c.fetchone() # Make sure it's correct self.assertEqual(result[0], title) self.assertEqual(result[1], url) self.assertEqual(result[2], category) db_conn.close()
def submit(): failure_response = (Response('{"F*****g Burn": True}', mimetype="application/json"), 666) try: if request.json['submission_salt'] != current_app.config['SUBMISSION_SALT']: return failure_response except KeyError: return failure_response url = request.json["url"] person = request.json["person"] title = request.json.get("title", "") return insert_item(url, person, g.db_file, title)
def collect_ml_stories_from_hn(): print("[ml][hn] Collecting the ml stories from hacker news ...") # Initialize database db_conn = database.init(config.DATABASE_FILE) # Get ml stories from hacker news stories = source.get_ml_stories_from_hn() if len(stories) == 0: print("[ml][hn] No stories") for story in stories: # Insert story to the database try: database.insert_item(db_conn, story.title, story.url, "ml") print("[ml][hn] Item {} inserted".format(story.url)) except sqlite3.IntegrityError: print("[ml][hn] Item {} already exists".format(story.url)) except Exception as e: print("[ml][hn] Insert item {} failed {}".format(story.url, e)) # Close the database connection db_conn.close()
def collect_rust_stories_from_lobsters(): print("[rust][lobsters] Collecting the rust stories from lobsters ...") # Initialize database db_conn = database.init(config.DATABASE_FILE) # Get rust stories from lobsters stories = source.get_rust_stories_from_lobsters() if len(stories) == 0: print("[rust][lobsters] No stories") for story in stories: # Insert story to the database try: database.insert_item(db_conn, story.title, story.url, "rust") print("[rust][lobsters] Item {} inserted".format(story.url)) except sqlite3.IntegrityError: print("[rust][lobsters] Item {} already exists".format(story.url)) except Exception as e: print("[rust][lobsters] Insert item {} failed {}".format( story.url, e)) # Close the database connection db_conn.close()