def main(current_time): # print "\n" # print "\n" # print "House Democrats" # print "\n" article = Article('http://www.utahfoundation.org/news/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.utahfoundation.org" letters = soups.find_all("article", class_="row news-entry col-sm-11") for element in letters: url = prefix + element.a["href"] pub_date = element.i.text.split() dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Utah Datapoints" # print "\n" article = Article('http://utahdatapoints.com/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://utahdatapoints.com/" letters = soups.find_all("div", class_="entry-meta") for element in letters: url = element.a["href"] pub_date = element.text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = pub_date[2][:] dateTimeForm[0][1] = pub_date[3][:-1] dateTimeForm[0][2] = pub_date[4][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "EXECUTIVE BRANCH:" article = Article('http://utahpoliticalcapitol.com/category/on-the-hill/executive-branch/') article.download() soups = BeautifulSoup(article.html) soupTime = BeautifulSoup(article.html) tempListud = {} articleTime = current_time[:] tempList = [] count = 0 prefix = "h" letters = soups.find_all("h2", class_="entry-title taggedlink") soupDate = soupTime.find_all("time", class_="published") for element in letters: url = (prefix + ((str(element).split())[3]).encode('utf-8').strip('href=">')) tempList.append(url) for pub in soupDate: dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] pub = pub.encode('utf-8').replace(">", " ").split() dateTimeForm[0][0] = pub[3][:] dateTimeForm[0][1] = pub[4][:-1] dateTimeForm[0][2] = articleTime[0][2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[tempList[count]] = dateTimeForm count = count + 1 # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Utah Reps" # print "\n" article = Article('http://www.utahreps.net/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.utahreps.net" letters = soups.find_all("article") for element in letters: url = element.a["href"] if url != "http://www.utahreps.net/ourperspective": pub_date = element.p.text.split() dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Daily Utah Chronicle" # print "\n" article = Article('http://dailyutahchronicle.com/category/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://dailyutahchronicle.com" letters = soups.find_all("div", class_="td-module-thumb") for element in letters: url = element.a["href"] article = Article(url) article.download() soups = BeautifulSoup(article.html) publishDate = soups.find_all("span", class_="td-post-date") pub_date = publishDate[0].text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "St George" # print "\n" article = Article( 'https://www.stgeorgeutah.com/news/archive/category/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "https://www.stgeorgeutah.com" letters = soups.find_all("div", class_="recent-story") for element in letters: url = element.a["href"] pub_date = element.text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = pub_date[-4][:] dateTimeForm[0][1] = pub_date[-3][:-3] dateTimeForm[0][2] = pub_date[-2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "KUOW" # print "\n" article = Article('http://kuow.org/term/washington-state-legislature') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "http://kuow.org" articles = soups.find_all("div", class_="large-12 columns") for element in articles: url = prefix + element.find("div", class_="title-info").a['href'] pub_date = element.find("span", class_="pub-date").text.replace(":", " ").split() if pub_date[2] == "ago": if pub_date[1] == "minutes": if articleTime[1][0] == "0" and int(articleTime[1][1]) - int(pub_date[0]) <= 0: pub_date[1] = str(int(articleTime[0][1]) - 1) + "," else: pub_date[1] = articleTime[1][1] + "," elif pub_date[len(pub_date)-2] == "hours": if int(articleTime[1][0]) - int(pub_date[0]) <= 0: pub_date[1] = str(int(articleTime[0][1]) - 1) + "," else: pub_date[1] = articleTime[1][1] + "," pub_date[0] = articleTime[0][0] pub_date[2] = articleTime[0][2] dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Kitsap Sun" # print "\n" article = Article('http://www.kitsapsun.com/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.kitsapsun.com" articles = soups.find_all( "h1", class_="hero-hed hero-headline-pack-hed hero-text-hed placeholder-hide" ) articles = articles + soups.find_all("li", class_="hero-list-item") articles = articles + soups.find_all("li", class_="hgpm-item") for element in articles: url = prefix + element.a['href'] article = Article(url) article.download() soups = BeautifulSoup(article.html) pub_date = soups.find("span", class_="asset-metabar-time").text.split("|") pub_date = pub_date[0].replace(":", " ").split() if pub_date[3] == "p.m.": pub_date[1] = str(int(pub_date[1]) + 12) if pub_date[5][len(pub_date[5]) - 1] == ".": pub_date[5] = pub_date[5][:-1] dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[5][:] dateTimeForm[0][1] = pub_date[6][:-1] dateTimeForm[0][2] = pub_date[7][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = pub_date[1][:] dateTimeForm[1][1] = pub_date[2][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "King 5" # print "\n" try: article = Article('http://www.king5.com/politics') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.king5.com" letters = soups.find_all("div", "story-snapshot-with-abstract__headline") letters = letters + soups.find_all("li", class_="headline-list-with-abstract__item") letters = letters + soups.find_all("div", class_="text-only-headline-list__headline") for element in letters: url = element.a["href"] url = prefix + url article = Article(url) article.download() soups = BeautifulSoup(article.html) pub_date = soups.find("span", class_="author__date").text.replace(":", " ").split() dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[4][:] dateTimeForm[0][1] = pub_date[5][:-1] dateTimeForm[0][2] = pub_date[6][:] # Time [Hour,Min,Sec] if pub_date[2] == "PM": pub_date[0] = str(int(pub_date[0]) + 12) dateTimeForm[1][0] = pub_date[0][:] dateTimeForm[1][1] = pub_date[1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud except: print "ERROR: An error occured while grabber for articles in \n King 5 \n"
def main(current_time): # print "\n" # print "\n" # print "Q13 Fox" # print "\n" article = Article('http://q13fox.com/category/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] # prefix = "http://q13fox.com" articles = soups.find_all("h2", class_="entry-title") articles = articles + soups.find_all("h4", class_="entry-title") for element in articles: url = element.a['href'] article = Article(url) article.download() soups = BeautifulSoup(article.html) pub_date = soups.find("span", class_="posted-time").text.replace(":", " ").split() # print pub_date if pub_date[3] == "PM": pub_date[1] += 12 dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[4][:] dateTimeForm[0][1] = pub_date[5][:-1] dateTimeForm[0][2] = pub_date[6][:-1] # Time [Hour,Min,Sec] dateTimeForm[1][0] = pub_date[1][:] dateTimeForm[1][1] = pub_date[2][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # # print "\n" # # print "\n" # print "Fox 13" # print "\n" article = Article('http://fox13now.com/category/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] dateList = [] tempListud = {} articleTime = current_time[:] prefix = "h" feature_letters = soups.find_all("h2", class_="entry-title") letters = soups.find_all("h4", class_="entry-title") for element in letters: element = str(element).replace("<", " ") element = element.replace(">", " ") tempList.append((prefix + ((element.split())[3]).encode('utf-8').strip('href=">'))) ft_element = str(feature_letters[0]).replace("<", " ") ft_element = ft_element.replace(">", " ") tempList.append((prefix + ((ft_element.split())[3]).encode('utf-8').strip('href=">'))) for url in tempList: article = Article(url) article.download() soups = BeautifulSoup(article.html) publishDate = soups.find_all("span", class_="posted-time") dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] pub = str(publishDate).encode('utf-8').split() pub[5] = pub[5][:-1] pub[2] = pub[2].encode('utf-8').replace(":", " ").split() if pub[3] == "pm": pub[2][0] = str(int(pub[2][0]) + int("12")) dateTimeForm[0][0] = pub[4][:] dateTimeForm[0][1] = pub[5][:] dateTimeForm[0][2] = articleTime[0][2][:] dateTimeForm[1][0] = pub[2][0][:] dateTimeForm[1][1] = pub[2][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Capitol Record" # print "\n" article = Article('https://www.tvw.org/blog/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "https://www.tvw.org" articles = soups.find_all("h2", class_="entry-title") for element in articles: url = prefix + element.a['href'] pub_date = element.text.split() if pub_date[3][-1] == ".": pub_date[3] = pub_date[3][:-1] if pub_date[5][-1] == ":": pub_date[5] = pub_date[5][:-1] if pub_date[3] == "Nov.1st,": pass else: dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[3][:] dateTimeForm[0][1] = pub_date[4][:-3] dateTimeForm[0][2] = pub_date[5][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Spokesman Review" # print "\n" article = Article('http://www.spokesman.com/washington-government/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.spokesman.com" articles = soups.find_all("article", class_="mb5 cf cb pb5 bb b--black-10") for element in articles: element = element.find("header", class_="mb3") url = prefix + element.a['href'] pub_date = element.find( "p", class_="mt0 f6 tu gray sans-serif ").text.split() if pub_date[0] == "UPDATED:": pub_date = pub_date[1:] if pub_date[3][-1] == ",": pub_date[3] = pub_date[3][:-1] dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[1][:-1] dateTimeForm[0][1] = pub_date[2][:-1] dateTimeForm[0][2] = pub_date[3][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "The Olympian" # print "\n" article = Article('http://www.theolympian.com/news/politics-government/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.theolympian.com/" letters = soups.find_all("h4", class_="title ") for element in letters: url = element.a['href'] # print url article = Article(url) article.download() soups = BeautifulSoup(article.html) pub_date = soups.find("p", class_="published-date") if pub_date: # print pub_date.text.replace(":", " ").split() # print "\n" pub_date = pub_date.text.replace(":", " ").split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "The News Tribune" # print "\n" article = Article('http://www.thenewstribune.com/news/local/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] # prefix = "http://www.thenewstribune.com/" letters = soups.find_all("article", class_="politics_government media ") for element in letters: url = element.find("h4", class_="title ").a['href'] article = Article(url) article.download() soups = BeautifulSoup(article.html) pub_date = soups.find("p", class_="published-date").text.replace(":", " ").split() dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] if pub_date[5] == "PM" and pub_date[3] != "12": pub_date[3] = str(int(pub_date[3]) + 12) dateTimeForm[1][0] = pub_date[3][:] dateTimeForm[1][1] = pub_date[4][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "KSL" # print "\n" r = urllib.urlopen('http://www.ksl.com/?nid=599').read() prefix = "https://www.ksl.com/" soup = BeautifulSoup(r) letters = soup.find_all("div", class_="headline") publishDate = soup.find_all("span", class_="short") tempList = [] tempListud = {} article_step = 0 articleTime = current_time[:] for letter in letters: url = prefix + letter.a["href"] dates = publishDate[article_step].text dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dates = dates.replace("-", " ") dates = dates.replace(":", " ") dates = dates.split() dates[1] = dates[1][:-2] if dates[3][2:] == "pm": if int(dates[2]) != 12: dates[2] = str(int(dates[2]) + int("12")) dates[3] = dates[3].replace("a", "").replace("m", "").replace("p", "") dateTimeForm[0][0] = dates[0] dateTimeForm[0][1] = dates[1] dateTimeForm[0][2] = articleTime[0][2][:] dateTimeForm[1][0] = dates[2] dateTimeForm[1][1] = dates[3] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm article_step = article_step + 1 # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Washington Education Association" # print "\n" article = Article('https://www.washingtonea.org/ourvoice/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] # prefix = "https://www.washingtonea.org" articles = soups.find_all("div", class_="featured-post rtecontent") articles = articles + soups.find_all("li", class_="list_item") for element in articles: url = element.a['href'] pub_date = element.find("span", class_="icon date").text.replace("/", " ").split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Salt Lake Tribune" # print "\n" article = Article('http://www.sltrib.com/news/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} count = 1 article_total = 24 article_count = 0 articleTime = current_time[:] postdate = soups.find_all("div", class_="extras") for element in postdate: if element.a["href"]: element_url = element.a["href"][:-14] article_count = article_count + 1 pub_date = element.text.replace(":", " ").split() if pub_date[6] == "pm": pub_date[4] = str(int(pub_date[4]) + 12) dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = pub_date[1][:] dateTimeForm[0][1] = pub_date[2][:] dateTimeForm[0][2] = pub_date[3][:] dateTimeForm[1][0] = pub_date[4][:] dateTimeForm[1][1] = pub_date[5][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[element_url] = dateTimeForm count = count + 1 if article_count == article_total: break # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Washington State Democrats" # print "\n" article = Article('https://www.wa-democrats.org/news') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "https://www.wa-democrats.org" urls = soups.find_all("h2", class_="node-title") dates = soups.find_all("p", class_="submitted") for element_urls in urls: url_list.append(prefix + element_urls.a['href']) for y, element_dates in enumerate(dates): pub_date = element_dates.text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url_list[y]] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Senate Site" # print "\n" article = Article('http://www.senatesite.com/2017/blog/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} count = 1 articleTime = current_time[:] prefix = "http://www.senatesite.com" letters = soups.find_all("h2", class_="blog-shortcode-post-title entry-title") postdate = soups.find_all("span", class_="updated") # print postdate[0].text.replace("T", " ").replace("+", " ").split() for element in letters: url = (prefix + element.a["href"]) pub = postdate[count].text.replace("T", " ").replace("+", " ").split() pub_date = pub[0].replace("-", " ").split() pub_time = pub[1].replace(":", " ").split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = pub_date[1][:] dateTimeForm[0][1] = pub_date[2][:] dateTimeForm[0][2] = pub_date[0][:] dateTimeForm[1][0] = pub_time[0][:] dateTimeForm[1][1] = pub_time[1][:] dateTimeForm[1][2] = pub_time[2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 0: tempListud[url] = dateTimeForm count = count + 1 # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Utah Policy" # print "\n" article = Article( 'http://utahpolicy.com/index.php/features/today-at-utah-policy') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] prefix = "https://utahpolicy.com" letters = soups.find_all("td", class_="list-title") for element in letters: url = (prefix + ((str(element).split())[3]).encode('utf-8').strip('href=">')) # print url tempList.append(url) article = Article(url) article.download() soups = BeautifulSoup(article.html) publishDate = soups.find_all("dd", class_="create") for pub in publishDate: dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] pub = pub.encode('utf-8').split() dateTimeForm[0][0] = pub[9][:] dateTimeForm[0][1] = pub[8][:] dateTimeForm[0][2] = pub[10][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Washington State Republican Party" # print "\n" article = Article('https://wsrp.org/media/press-releases/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] # prefix = "https://wsrp.org/" urls = soups.find_all("h2", class_="green-text") dates = soups.find_all("div", class_="postmonth") for element_urls in urls: url_list.append(element_urls.a['href']) for y, element_dates in enumerate(dates): pub_date = element_dates.text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:-1] dateTimeForm[0][1] = pub_date[1][:] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url_list[y]] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Northwest News Network" # print "\n" article = Article( 'http://nwnewsnetwork.org/category/government-and-politics') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "http://nwnewsnetwork.org" articles = soups.find_all("div", class_="title-info") for element in articles: url = prefix + element.a['href'] pub_date = element.find("span", class_="pub-date").text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Seattle PI" # print "\n" article = Article('http://www.seattlepi.com/local/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] # prefix = "http://www.seattlepi.com/" letters = soups.find_all("div", class_="story-content-wrapper equal-group") for element in letters: url = element.find("div", class_="story-content").a["href"] pub_date = element.find("span", class_="story-date").text.replace(":", " ").split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "STATUS OF 2017 flagged bills:" article = Article( 'http://utahpoliticalcapitol.com/2017-session-status-of-flagged-bills/' ) article.download() soups = BeautifulSoup(article.html) soupTime = BeautifulSoup(article.html) tempList = [] tempListud = {} articleTime = current_time[:] count = 0 prefix = "h" letters = soups.find_all("td", class_="column-7") soupDate = soupTime.find_all("time", class_="published") for element in letters: url = (prefix + ((str(element).split())[3]).encode('utf-8').strip('href=">')) tempList.append(url) for pub in soupDate: dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] pub = pub.encode('utf-8').replace(">", " ") pub = pub.encode('utf-8').replace("<", " ").split() dateTimeForm[0][0] = pub[3][:] dateTimeForm[0][1] = pub[4][:-1] dateTimeForm[0][2] = pub[5][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[tempList[count]] = dateTimeForm count = count + 1 # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Public News Service" # print "\n" article = Article('http://crosscut.com/category/politics/') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] # prefix = "http://crosscut.com" articles = soups.find_all("div", class_="inner") for element in articles: url = element.a['href'] if url != "http://crosscut.com": pub_date = element.text.split()[:3] dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[0][:] dateTimeForm[0][1] = pub_date[1][:-1] dateTimeForm[0][2] = pub_date[2][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "House Democrats" # print "\n" article = Article('http://www.utahhousedemocrats.org/news/') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} count = 0 articleTime = current_time[:] prefix = "http://www.utahhousedemocrats.org" headers = soups.find_all("header", class_="entry-header") letters = soups.find_all("h1", class_="entry-title p-name") for element in letters: url = prefix + element.a["href"] tempList.append(url) date_element = headers[count] date = date_element.text.split()[2:5] dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] dateTimeForm[0][0] = date[0][:] dateTimeForm[0][1] = date[1][:-1] dateTimeForm[0][2] = date[2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm count = count + 1 # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Public News Service" # print "\n" article = Article('http://www.publicnewsservice.org/state-washington/WA') article.download() soups = BeautifulSoup(article.html) tempList = [] url_list = [] tempListud = {} articleTime = current_time[:] prefix = "http://www.publicnewsservice.org" articles = soups.find_all("div", class_="group_info") for element in articles: url = prefix + element.find("div", class_="title").a['href'] pub_date = element.find("div", class_="rundate").text.split() dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]] # Date Time in the formate [Month,Day,Year][Hour,Min,Sec] # Date [Month,Day,Year] dateTimeForm[0][0] = pub_date[1][:] dateTimeForm[0][1] = pub_date[2][:-1] dateTimeForm[0][2] = pub_date[3][:] # Time [Hour,Min,Sec] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime, dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm # for article in tempListud: # print article, tempListud[article] return tempListud
def main(current_time): # print "\n" # print "\n" # print "Senate Democrats" # print "\n" article = Article('http://www.utahsenatedemocrats.org/home/main/blog') article.download() soups = BeautifulSoup(article.html) tempList = [] tempListud = {} count = 0 articleTime = current_time[:] prefix = "http://www.utahsenatedemocrats.org" letters = soups.find_all("h4", class_="post-title") postdate = soups.find_all("div", class_="post-footer") for element in letters: url = element.a["href"] tempList.append(url) pub = postdate[count] dateTimeForm = [["0", "0", "0"],["0", "0", "0"]] pub = pub.text.split() dateTimeForm[0][0] = pub[1][:] dateTimeForm[0][1] = pub[0][:] dateTimeForm[0][2] = pub[2][:] dateTimeForm[1][0] = articleTime[1][0][:] dateTimeForm[1][1] = articleTime[1][1][:] dateTimeForm[1][2] = articleTime[1][2][:] dateDiff = date_subtracter.main(articleTime,dateTimeForm) if dateDiff[0] == 1: tempListud[url] = dateTimeForm count = count + 1 # for article in tempListud: # print article, tempListud[article] return tempListud