def scrape(): # Run the scrape function mars_data = web_scrape.scrape() # Update the Mongo database using update and upsert=True mongo.db.collection.update({}, mars_data, upsert=True) # Redirect back to home page return redirect("/")
def mars(): mongo.db.mars.update({}, {}, upsert=True) mars = mongo.db.mars mars_info = web_scrape.scrape() mars.update({}, mars_info, upsert=True) return redirect("/", code=302)
#!/usr/bin/env python3 import web_scrape from collections import defaultdict import random from typing import Dict, List, Tuple words: List[str] = web_scrape.scrape() starting_words: List[str] = [] trigrams: Dict[Tuple[str, str], str] = defaultdict(list) for prev, current, next in zip(words, words[1:], words[2:]): if prev == ".": starting_words.append(current) trigrams[(prev, current)].append(next) sentence: List[str] = [] current: Tuple[str, str] = (".", random.choice(starting_words)) while True: sentence.append(current[1]) if len(trigrams[current]) == 0: break current = (current[1], random.choice(trigrams[current])) if current[1] == ".": break print(" ".join(sentence))
def scrape(): mars = mongo.db.mars mars_data = web_scrape.scrape() mars.update({}, mars_data, upsert=True) return redirect("http://localhost:5000/", code=302)
#!/usr/bin/env python3 import web_scrape from collections import defaultdict import random from typing import Dict, List words = web_scrape.scrape() pairs: Dict[str, List[str]] = defaultdict(list) for prev, current in zip(words, words[1:]): pairs[prev].append(current) current = "." sentence: List[str] = [] while True: possible_words = pairs[current] current = random.choice(possible_words) sentence.append(current) if current == ".": break print(" ".join(sentence))
for my_url in string_bank: # extract the date of the url (thanks WayBack machine for standardizing urls) my_date = my_url[28:32] + "-" + my_url[32:34] + "-" + my_url[34:36] temp_data = web_scrape_v2.scrape_v2(my_url) print("successful scrape") if len(temp_data) == 0: print("temp_data is empty", my_date) continue # entering data into the csv file for row in temp_data: row.append(my_date) w.writerow(row) # doing the same thing as above, but with the sites newer format. # The new html formats use a different scraper (the original one) string_bank = bank.top_rank_bank_with_new_format for my_url in string_bank: # extract the date of the url (thanks WayBack machine for standardizing urls) my_date = my_url[28:32] + "-" + my_url[32:34] + "-" + my_url[34:36] temp_data = web_scrape.scrape(my_url) print("successful scrape") if len(temp_data) == 0: print("temp_data is empty", my_date) continue # entering data into the csv file for row in temp_data: row.append(my_date) w.writerow(row) # close that file now # print("an error occured") f.close()