コード例 #1
0
def scrape():

    # Run the scrape function
    mars_data = web_scrape.scrape()

    # Update the Mongo database using update and upsert=True
    mongo.db.collection.update({}, mars_data, upsert=True)

    # Redirect back to home page
    return redirect("/")
コード例 #2
0
def mars():
    mongo.db.mars.update({}, {}, upsert=True)
    mars = mongo.db.mars
    mars_info = web_scrape.scrape()
    mars.update({}, mars_info, upsert=True)
    return redirect("/", code=302)
コード例 #3
0
#!/usr/bin/env python3

import web_scrape
from collections import defaultdict
import random
from typing import Dict, List, Tuple

words: List[str] = web_scrape.scrape()
starting_words: List[str] = []
trigrams: Dict[Tuple[str, str], str] = defaultdict(list)
for prev, current, next in zip(words, words[1:], words[2:]):
    if prev == ".":
        starting_words.append(current)

    trigrams[(prev, current)].append(next)

sentence: List[str] = []
current: Tuple[str, str] = (".", random.choice(starting_words))
while True:
    sentence.append(current[1])
    if len(trigrams[current]) == 0:
        break
    current = (current[1], random.choice(trigrams[current]))
    if current[1] == ".":
        break

print(" ".join(sentence))
コード例 #4
0
ファイル: app.py プロジェクト: as4321/HW12
def scrape():
    mars = mongo.db.mars
    mars_data = web_scrape.scrape()
    mars.update({}, mars_data, upsert=True)
    return redirect("http://localhost:5000/", code=302)
コード例 #5
0
ファイル: bigram.py プロジェクト: gregpaton08/dsfs
#!/usr/bin/env python3

import web_scrape
from collections import defaultdict
import random
from typing import Dict, List

words = web_scrape.scrape()

pairs: Dict[str, List[str]] = defaultdict(list)
for prev, current in zip(words, words[1:]):
    pairs[prev].append(current)

current = "."
sentence: List[str] = []
while True:
    possible_words = pairs[current]
    current = random.choice(possible_words)
    sentence.append(current)
    if current == ".":
        break

print(" ".join(sentence))
コード例 #6
0
for my_url in string_bank:
    # extract the date of the url (thanks WayBack machine for standardizing urls)
    my_date = my_url[28:32] + "-" + my_url[32:34] + "-" + my_url[34:36]
    temp_data = web_scrape_v2.scrape_v2(my_url)
    print("successful scrape")
    if len(temp_data) == 0:
        print("temp_data is empty", my_date)
        continue
    # entering data into the csv file
    for row in temp_data:
        row.append(my_date)
        w.writerow(row)

# doing the same thing as above, but with the sites newer format.
# The new html formats use a different scraper (the original one)
string_bank = bank.top_rank_bank_with_new_format
for my_url in string_bank:
    # extract the date of the url (thanks WayBack machine for standardizing urls)
    my_date = my_url[28:32] + "-" + my_url[32:34] + "-" + my_url[34:36]
    temp_data = web_scrape.scrape(my_url)
    print("successful scrape")
    if len(temp_data) == 0:
        print("temp_data is empty", my_date)
        continue
    # entering data into the csv file
    for row in temp_data:
        row.append(my_date)
        w.writerow(row)
# close that file now
# print("an error occured")
f.close()