def import_covid_csv(): url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv' retrieve(url, 'covid_file/us.csv') with open('covid_file/us.csv', 'r') as covidfile: csv_read = csv.DictReader(covidfile) #print(csv_read) covid_list = [] # For loop to get the last 7 days of Covid Data for j in list(reversed(list(csv_read)))[0:7]: #print(j) date = j['date'] cases = j['cases'] deaths = j['deaths'] covid_list.append({'date': date, 'cases': cases, 'deaths': deaths}) # Function to compute and display the COVID-19 death rate def compute_csv(): data = [] with open('covid_file/us.csv', 'r') as covidfile: csv_read = csv.DictReader(covidfile) for i in list(reversed(list(csv_read))): # print(i) cases = i['cases'] deaths = i['deaths'] rate = int(deaths) / int(cases) data.append({ 'cases': cases, 'deaths': deaths, 'rate': rate }) return data return render_template('covid.html', l=covid_list, k=compute_csv())
def protein_properties(): tmpdir = mkdtemp() retrieve( 'http://pax-db.org/data/abundances/4932-WHOLE_ORGANISM-integrated.txt', os.path.join(tmpdir, 'yeast-paxdb.txt')) retrieve(( 'http://www.uniprot.org/uniprot/?query=' 'taxonomy%3A559292%20AND%20reviewed%3Ayes&columns=id%2Cmass%2Cgenes(OLN)&format=tab' ), os.path.join(tmpdir, 'swissprot.txt')) pax = pd.read_csv(os.path.join(tmpdir, 'yeast-paxdb.txt'), comment='#', sep='\t', names=['paxid', 'locus', 'abundance']) pax = pax[['locus', 'abundance']] pax['locus'] = pax['locus'].apply(lambda x: x.replace('4932.', '')) pax['abundance'] = pax['abundance'] / 1e6 mw = pd.read_csv(os.path.join(tmpdir, 'swissprot.txt'), sep='\t', thousands=',') mw.columns = ['uniprot', 'mw', 'locus'] proteins = pd.merge(mw, pax, on='locus', how='outer') proteins.index = proteins.uniprot (proteins[(proteins['uniprot'].notnull())][['mw', 'abundance']].fillna( proteins.mean()).to_csv(os.path.join(DATADIR, 'proteins.txt')))
def do(iteration=iteration, _i=_i): for i in books[iteration:]: try: # don't process books with images open(static("books/image_" + i.slug + ".jpg")) i.img = "https://everylibrary.co/static/books/image_" + i.slug + ".jpg" i.save() continue except FileNotFoundError: pass try: data = urlopen( "https://www.googleapis.com/books/v1/volumes?q=title:" + quote(i.title), data=None, timeout=5) _i += 1 print("Processing", _i, "of", len(books), end="\r") b_data = loads( data.read())["items"][0]["volumeInfo"] if not "imageLinks" in b_data: # no image was available for this book continue retrieve( b_data["imageLinks"]["thumbnail"], "../../../staticfiles/books/image_%s.jpg" % i.slug) i.img = "https://everylibrary.co/static/books/image_" + i.slug + ".jpg" i.save() except HTTPError as e: # we are not permitted to use the API if "403" in str(e) or "503" in str(e): # wait for a while # the reason being that the # Google Books API doesn't like # being spammed sleep(5) # this code skips most of the already-processed # books iteration += 1 print( "Google books refusing to cooperate, retrying..", end="\n") do() else: print("Error:", e)
def get_historic_data(ticker, reused=False): ticker.replace('.', '-') ticker = ticker.upper() # date = datetime.datetime.fromtimestamp(83548800) url = f'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1=0&period2=1622332800&interval=1d&events=history&includeAdjustedClose=true' # check if webiste exists or not request = requests.get(url) if request.status_code == 200: if reused: ticker = ticker[:-1] retrieve(url, f'stock_data/{ticker}.csv') elif request.status_code == 404: if reused: return elif len(ticker) > 2 and ticker[-2:] == '-U': get_historic_data(ticker.replace("-U", "-UN"), reused=True)
def WHO_data(countries): # list of possible countries countries_possible2 = { 'AFG': 'Afghanistan', 'BTN': 'Bhutan', 'BGD': 'Bangladesh', 'CHN': 'China', 'NPL': 'Nepal', 'IND': 'India', 'PAK': 'Pakistan', 'LKA': 'Sri Lanka', 'MMR': 'Myanmar', 'JPN': 'Japan' } # retrieve the data from WHO counter = 0 for a in countries: URL = 'http://apps.who.int/gho/athena/api/GHO/MORT_MATERNALNUM.csv?filter=COUNTRY:' + ''.join( a) retrieve(URL, 'test.csv') data = pd.read_csv('test.csv') if counter == 0: data1 = data[["YEAR", "COUNTRY", "Numeric"]] data2 = data1.pivot(index='YEAR', columns='COUNTRY', values='Numeric') data2.rename(columns={a: countries_possible2[a]}, inplace=True) counter += 1 else: data1 = data[["YEAR", "COUNTRY", "Numeric"]] data3 = data1.pivot(index='YEAR', columns='COUNTRY', values='Numeric') data4 = pd.Series(data3[a]) data2[a] = data4 data2.rename(columns={a: countries_possible2[a]}, inplace=True) data_final = data2.loc[2008:] # Return the data return data_final
def get_csv(): url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv' retrieve(url, 'covid_file/us.csv') if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also submits an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) flash('Successful') return redirect(url_for('upload_file', filename=filename)) return render_template('data.html', keys=request.args.get('filename'))
def datascrapeddwnld(request): try: #ccl_data = request.session['ccl_data'] # We will download the csv file #ccl_data = downloadScrapData(ccl_data) url = 'C:/Users/jyoti/Desktop/exponentialxApp9/exponentialxApp/Downloads' except Exception as e: return render(request, 'createcontactlist.html', {'dwnldscrapdata': "Error"}) else: return render( request, 'createcontactlist.html', { 'dwnldscrapdata': "CSV Downloaded", 'ccl_data': retrieve(url, 'contactlist.csv') })
import csv from urllib.request import urlretrieve as retrieve import matplotlib.pyplot as plt from datetime import datetime #Download data url = 'https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-totals-uk.csv' retrieve(url, 'covid_data.csv') #Open data with open('covid_data.csv', 'r') as read_data_file: csv_reader = csv.reader(read_data_file) header_row = next(csv_reader) #Get deaths from data dates, deaths = [], [] yday_death = 0 for row in csv_reader: current_date = datetime.strptime(row[0], '%Y-%m-%d') death = int(row[3]) - yday_death yday_death = int(row[3]) dates.append(current_date) deaths.append(death) #Plot the data plt.style.use('seaborn') fig, ax = plt.subplots() ax.plot(dates, deaths, c='red') #Format the graph ax.set_title("Covid-19 UK Deaths")
import pandas as pd import matplotlib.pyplot as plt import numpy as np from urllib.request import urlretrieve as retrieve url = "https://covid.ourworldindata.org/data/ecdc/full_data.csv" retrieve(url, "full_data.csv") dataset = pd.DataFrame(pd.read_csv('full_data.csv')) pais = "Argentina" dataPais = dataset.loc[dataset['location'] == pais] casos = list(dataPais.total_cases) inicio = None for i in range(len(casos)): if casos[i] != 0: inicio = i break casos = casos[inicio:] nuevosCasos = list(dataPais.new_cases)[inicio:] muertes = list(dataPais.total_deaths)[inicio:] nuevasMuertes = list(dataPais.new_deaths)[inicio:] fecha = [] for i in list(dataPais.date): fecha.append(str(i[8:10]) + "/" + str(i[5:7])) fecha = fecha[inicio:] plt.subplot(2, 2, 1)
def download_csv(request): confirmed_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_' \ 'series/time_series_covid19_confirmed_global.csv' deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_'\ 'series/time_series_covid19_deaths_global.csv' recovered_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_' \ 'series/time_series_covid19_recovered_global.csv' confirmed_filename = 'confirmed_csv_data.csv' deaths_filename = 'deaths_csv_data.csv' recovered_filename = 'recovered_csv_data.csv' myPath = './Data/CSV' confirmed_fullpath = os.path.join(myPath, confirmed_filename) deaths_fullpath = os.path.join(myPath, deaths_filename) recovered_fullpath = os.path.join(myPath, recovered_filename) retrieve(confirmed_url, confirmed_fullpath) retrieve(deaths_url, deaths_fullpath) retrieve(recovered_url, recovered_fullpath) confirmed_data = pd.read_csv(confirmed_fullpath) confirmed_data = confirmed_data.drop(['Lat', 'Long'], axis=1) # confirmed_data = confirmed_data.groupby(['Country/Region']) # data = defaultdict(list) data = defaultdict(dict) for i in range(len(confirmed_data)): for j in range(2, len(confirmed_data.columns)): if confirmed_data.iloc[i][1] in data: if confirmed_data.columns[j] in data[confirmed_data.iloc[i] [1]]: data[confirmed_data.iloc[i][1]][confirmed_data.columns[j]]['Confirmed'] = \ confirmed_data.iloc[i][j] + \ data[confirmed_data.iloc[i][1]][confirmed_data.columns[j]]['Confirmed'] else: thisDict = { 'Confirmed': confirmed_data.iloc[i][j], 'Deaths': 0, 'Recovered': 0 } data[confirmed_data.iloc[i][1]][ confirmed_data.columns[j]] = thisDict else: thisDict = { 'Confirmed': confirmed_data.iloc[i][j], 'Deaths': 0, 'Recovered': 0 } data[confirmed_data.iloc[i][1]][ confirmed_data.columns[j]] = thisDict recovered_data = pd.read_csv(recovered_fullpath) recovered_data = recovered_data.drop(['Lat', 'Long'], axis=1) for i in range(len(recovered_data)): for j in range(2, len(recovered_data.columns)): if recovered_data.iloc[i][1] in data: if recovered_data.columns[j] in data[recovered_data.iloc[i] [1]]: data[recovered_data.iloc[i][1]][recovered_data.columns[j]]['Recovered'] = \ recovered_data.iloc[i][j] + \ data[recovered_data.iloc[i][1]][recovered_data.columns[j]]['Recovered'] else: thisDict = { 'Confirmed': 0, 'Deaths': 0, 'Recovered': recovered_data.iloc[i][j], } data[confirmed_data.iloc[i][1]][ recovered_data.columns[j]] = thisDict else: thisDict = { 'Confirmed': 0, 'Deaths': 0, 'Recovered': recovered_data.iloc[i][j], } data[recovered_data.iloc[i][1]][ recovered_data.columns[j]] = thisDict deaths_data = pd.read_csv(deaths_fullpath) deaths_data = deaths_data.drop(['Lat', 'Long'], axis=1) for i in range(len(deaths_data)): for j in range(2, len(deaths_data.columns)): if deaths_data.iloc[i][1] in data: if deaths_data.columns[j] in data[deaths_data.iloc[i][1]]: data[deaths_data.iloc[i][1]][deaths_data.columns[j]]['Deaths'] = \ deaths_data.iloc[i][j] + \ data[deaths_data.iloc[i][1]][deaths_data.columns[j]]['Deaths'] else: thisDict = { 'Confirmed': 0, 'Deaths': deaths_data.iloc[i][j], 'Recovered': 0 } data[confirmed_data.iloc[i][1]][ deaths_data.columns[j]] = thisDict else: thisDict = { 'Confirmed': 0, 'Deaths': deaths_data.iloc[i][j], 'Recovered': 0 } data[deaths_data.iloc[i][1]][deaths_data.columns[j]] = thisDict final_countrywise_data = defaultdict(list) for country, info in data.items(): for i, j in info.items(): # print(i) thisDict = { 'confirmed': int(j['Confirmed']), 'recovered': int(j['Recovered']), 'deaths': int(j['Deaths']), 'date': i, } final_countrywise_data[country].append(thisDict) tmp_global_data = defaultdict(dict) for country, info in data.items(): for date, j in info.items(): if date in tmp_global_data: tmp_global_data[date]['Confirmed'] = tmp_global_data[date]['Confirmed']+\ j['Confirmed'] tmp_global_data[date]['Deaths'] = tmp_global_data[date]['Deaths']+\ j['Deaths'] tmp_global_data[date]['Recovered'] = tmp_global_data[date]['Recovered']+\ j['Recovered'] else: thisDict = { 'Confirmed': j['Confirmed'], 'Deaths': j['Deaths'], 'Recovered': j['Recovered'], } tmp_global_data[date] = thisDict final_global_data = [] lastDayConfirmed = 0 lastDayDeaths = 0 lastDayRecovered = 0 for date, j in tmp_global_data.items(): # prev_date = get_previous_date(date) # print("Date : ", prev_date) thisDict = { 'confirmed': int(j['Confirmed'] - lastDayConfirmed), 'deaths': int(j['Deaths'] - lastDayDeaths), 'recovered': int(j['Recovered'] - lastDayRecovered), 'date': date } lastDayConfirmed = int(j['Confirmed']) lastDayDeaths = int(j['Deaths']) lastDayRecovered = int(j['Recovered']) final_global_data.append(thisDict) convert_csv_to_json(request, final_countrywise_data, "db_country_wise") convert_csv_to_json(request, final_global_data, "db_global") response = {} return render(request, 'csv_downloader/end.html', response)
from urllib.request import urlretrieve as retrieve import matplotlib.pyplot as plt import csv # RETRIEVE THE DOCUMENTS url_death_counts = 'https://data.cdc.gov/api/views/9bhg-hcku/rows.csv?accessType=DOWNLOAD' url_lockdown_policies = 'https://healthdata.gov/sites/default/files/state_policy_updates_20210114_1920.csv' retrieve(url_death_counts, 'age_sex_state.csv') retrieve(url_lockdown_policies, 'lockdowns.csv') # rt = "read" and "text" mode death_counts = open('age_sex_state.csv', 'rt') x = 0 dths_by_age = list() des_grps = [ "0-17 years", "18-29 years", "30-49 years", "50-64 years", "65-74 years", "75-84 years", "85 years and over" ] for row in death_counts: split_row = row.split(',') if (split_row[5] in des_grps): print(f"Row: {row}") print(f"Split: {split_row}") dths_by_age.append(int(split_row[6])) x += 1 if (len(dths_by_age) >= len(des_grps)): break print(dths_by_age)
"VBZOni0vs5VJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_" "col_id=column-2&p_p_col_count=4&p_p_col_pos=2&_documents_WAR_publicationsportlet_" "INSTANCE_VBZOni0vs5VJ" "_javax.faces.resource=document&_documents_WAR_publicationsportlet_INSTANCE_VBZOni0vs5VJ_" "ln=downloadResources&_" "documents_WAR_publicationsportlet_INSTANCE_VBZOni0vs5VJ_documentID="+str(i)+ "&_documents_WAR_publicationsportlet_INSTANCE_VBZOni0vs5VJ_locale=el") for i in range(2011, 2016): urls_to_paths.append("Test"+str(i)+".xls") year_list.append(i) # Loop for transport urls from url_list to excel files who is in urls_to_paths with help of function retrieve. j = 0 for i in url_list: retrieve(i, urls_to_paths[j]) j += 1 Quarter_list = [] # Store population of tourists per three months. data_population = [] # Store population of tourists in period 2011-2015 Edit_list = [] data_air = [] data_train = [] data_ship = [] data_car = [] country_list = [] total_list1 = [] total_list2 = [] total_list3 = [] total_list4 = [] total_list5 = []
## This python script downloads a txt file with the nuclear data and strips it down to useful CSV files from urllib.request import urlretrieve as retrieve url = 'http://amdc.in2p3.fr/nubase/nubase2016.txt' # Specifies the loction of the data. Can't download directly from IAEA website gives 403 error. A mirror is used instead. retrieve(url, 'Nudat.txt') # Retrieves the web location and saves in the route directory ##Needs an error handling facility so that the url can be manually entered if needed or a local file selected. import re ## Strip out first line (neutron) which messes up the column formatting ## Not very elegant, sure I can combine with the functions to strip and clean the data. with open('Nudat.txt', 'r') as fin: data = fin.read().splitlines(True) with open('Nudat2.txt', 'w') as fout: fout.writelines(data[1:]) ## Cleans up the data to remove all unwanted columns ## Writes a file NudatClean.txt where with the cleaned up tables. fin = open("Nudat2.txt", "rt") fout = open("NudatClean.txt", "wt") for line in fin: if len(line) > 106: fout.write(line[:18] + line[61:80] + line[109:] )
from my_function import * from urllib.request import urlretrieve as retrieve page_url = f"https://www.bnm.gov.my/-/monthly-highlights-and-statistics-in-{month}-2021" url = "https://www.bnm.gov.my/documents/20124/4196742/1.1.xls" # retrieve(url,"test.xls") bnm_data = { "Reserve Money": "https://www.bnm.gov.my/documents/20124/4196742/1.1.xls", "Currency in Circulation by Denomination": "https://www.bnm.gov.my/documents/20124/4196742/1.2.xls", "Monetary Aggregates M1, M2 and M3": "https://www.bnm.gov.my/documents/20124/4495210/1.3.xls" # "Merchant/Investment Banks: Statement of Assets": "https://www.bnm.gov.my/documents/20124/4196742/1.7.4.xls", } for file_name, url in bnm_data.items(): filename = file_name + ".xls" print(filename) retrieve(url, filename)
from urllib.request import urlretrieve as retrieve import matplotlib.pyplot as plt import csv # RETRIEVE THE DOCUMENTS url_death_counts = 'https://data.cdc.gov/api/views/3apk-4u4f/rows.csv?accessType=DOWNLOAD' retrieve(url_death_counts, 'total_age_deaths.csv') # rt = "read" and "text" mode death_counts = open('total_age_deaths.csv', 'rt') x = 0 dths_by_age = list() des_grps = ["0-17 years", "18-29 years", "30-49 years", "50-64 years", "65-74 years", "75-84 years", "85 years and over"] first = True age = 0 for row in death_counts: if first: first = False continue split_row = row.split(',') if(split_row[3] == 'Male'): dths_by_age.append(int(split_row[6])) else: dths_by_age[age] += int(split_row[6]) age += 1 print(f"Row: {split_row}") print(dths_by_age)
def downloadData(url): retrieve(url, 'birthday_dict.csv')
def handle(self, *args, **options): try: if options["all"]: # don't process books with images books = Publication.objects.all().filter(img=None).all() iteration = 0 _i = 0 print("Number of books to process:", len(books)) print("Processing", _i, "of", len(books), end="\r") def do(iteration=iteration, _i=_i): for i in books[iteration:]: try: # don't process books with images open(static("books/image_" + i.slug + ".jpg")) i.img = "https://everylibrary.co/static/books/image_" + i.slug + ".jpg" i.save() continue except FileNotFoundError: pass try: data = urlopen( "https://www.googleapis.com/books/v1/volumes?q=title:" + quote(i.title), data=None, timeout=5) _i += 1 print("Processing", _i, "of", len(books), end="\r") b_data = loads( data.read())["items"][0]["volumeInfo"] if not "imageLinks" in b_data: # no image was available for this book continue retrieve( b_data["imageLinks"]["thumbnail"], "../../../staticfiles/books/image_%s.jpg" % i.slug) i.img = "https://everylibrary.co/static/books/image_" + i.slug + ".jpg" i.save() except HTTPError as e: # we are not permitted to use the API if "403" in str(e) or "503" in str(e): # wait for a while # the reason being that the # Google Books API doesn't like # being spammed sleep(5) # this code skips most of the already-processed # books iteration += 1 print( "Google books refusing to cooperate, retrying..", end="\n") do() else: print("Error:", e) do() print("\nFinished!") elif options["title"]: print(options["title"]) p = Publication.objects.get(slug=options["title"]) f_to_save = "../../../staticfiles/books/image_%s.jpg" % p.slug try: data = urlopen( "https://www.googleapis.com/books/v1/volumes?q=title:" + quote(p.title), data=None, timeout=5) p_data = loads(data.read())["items"][0]["volumeInfo"] if not "imageLinks" in p_data: print("No image could be scraped for book '%s'" % p.title) return retrieve(p_data["imageLinks"]["thumbnail"], f_to_save) p.img = "https://everylibrary.co/" + f_to_save p.save() print("Scraped image for '%s', saved image at" % p.title, f_to_save) except HTTPError as e: print("Error:", e) except KeyboardInterrupt: print("\nQuitting...") return
def download (path, name): try: retrieve(path, name) except ContentTooShortError: raise DownloadError(path)
def download(path, name): try: retrieve(path, name) except ContentTooShortError: raise DownloadError(path)
import csv from urllib.request import urlretrieve as retrieve from plotly.graph_objs import Bar from plotly import offline from datetime import datetime #Download data url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/ecdc/total_deaths.csv' retrieve(url, 'web_covid_data.csv') #Open data with open('web_covid_data.csv', 'r') as read_data_file: csv_reader = csv.reader(read_data_file) header_row = next(csv_reader) #Get deaths from data dates, change_in_deaths, rolling_avs = [], [], [] yday_death = 0 seven_day_list = [] week_rolling_total = 0 for row in csv_reader: if datetime.strptime(row[0], '%Y-%m-%d') >= datetime.strptime('2020-03-07', '%Y-%m-%d'): current_date = datetime.strptime(row[0], '%Y-%m-%d') change_in_death = int(row[200]) - yday_death yday_death = int(row[200]) dates.append(current_date) change_in_deaths.append(change_in_death) #Add line of best fit