def readfile(name): with open('./data_store.pkl', 'rb') as handle: data_store = pickle.load(handle) write_excel = create_excel_file('./results/{}_results.xlsx'.format(name)) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=pd.DataFrame(data=data_store[1], columns=data_store[0]), ws=ws) wb.save(write_excel)
with open('IDEAScitystatedata{}.pkl'.format(indextostart), 'wb') as handle: pickle.dump([data_store_columns2, citystatedataoverall], handle, protocol=pickle.HIGHEST_PROTOCOL) print('Progress: {} out of {} done'.format(authors + indextostart, len(delimitedlocation))) #if authors > 2: # break write_excel = create_excel_file( './results/{}_results.xlsx'.format('IDEAScountrydata')) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=pd.DataFrame(data=countrydataoverall, columns=data_store_columns), ws=ws) wb.save(write_excel) write_excel = create_excel_file( './results/{}_results.xlsx'.format('IDEAScitystatedata')) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=pd.DataFrame(data=citystatedataoverall, columns=data_store_columns2), ws=ws) wb.save(write_excel) elapsed = (time.time() - start) / 3600 print(f"Elapsed time: {elapsed} hours")
title = 'NA' email = 'NA' personaldetails.append(name) personaldetails.append(title) personaldetails.append(email) personaldata.append(personaldetails) with open('GSaffiliationscrap{}.pkl'.format(indextostart), 'wb') as handle: pickle.dump([data_store_columns, personaldata], handle, protocol=pickle.HIGHEST_PROTOCOL) print('Progress: {} out of {} for {} done'.format(authors + indextostart, numberofauthors, name)) #if authors > 3: # break write_excel = create_excel_file( './results/{}_results.xlsx'.format('GSAffiliationScrap')) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=pd.DataFrame(data=personaldata, columns=data_store_columns), ws=ws) wb.save(write_excel) elapsed = (time.time() - start) / 3600 print(f"Elapsed time: {elapsed} hours")
if np.isnan(GStitles[i]): state1 = 'NA' state2 = 'NA' country = 'NA' except: geocode_result = gmaps.geocode(GStitles[i]) try: state1, state2, country = getstatecountry(geocode_result) except: state1 = 'Cannot retrieve' state2 = 'Cannot retrieve' country = 'Cannot retrieve' state1data.append(state1) state2data.append(state2) countrydata.append(country) #if i == 10: # break data = {'state1': state1data, 'state2': state2data, 'country': countrydata} write_excel = create_excel_file( './results/{}_results.xlsx'.format('GScitystatedata')) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=pd.DataFrame(data), ws=ws) wb.save(write_excel) elapsed = (time.time() - start) / 3600 print(f"Elapsed time: {elapsed} hours")
# twitterscraper from:JustinWolfers -bd 2018-07-01 -ed 2019-07-01 --output=JustinWolferspart8.json ----error 1114 # twitterscraper from:JustinWolfers -bd 2019-07-01 -ed 2020-05-19 --output=JustinWolferspart9.json ----error 1383 # twitterscraper from:JustinWolfers -bd 2011-07-01 -ed 2012-07-01 --output=JustinWolferspart1.json && twitterscraper from:JustinWolfers -bd 2012-07-01 -ed 2013-07-01 --output=JustinWolferspart2.json && twitterscraper from:JustinWolfers -bd 2013-07-01 -ed 2014-07-01 --output=JustinWolferspart3.json && twitterscraper from:JustinWolfers -bd 2014-07-01 -ed 2015-07-01 --output=JustinWolferspart4.json && twitterscraper from:JustinWolfers -bd 2015-07-01 -ed 2016-07-01 --output=JustinWolferspart5.json && twitterscraper from:JustinWolfers -bd 2016-07-01 -ed 2017-07-01 --output=JustinWolferspart6.json && twitterscraper from:JustinWolfers -bd 2017-07-01 -ed 2018-07-01 --output=JustinWolferspart7.json && twitterscraper from:JustinWolfers -bd 2018-07-01 -ed 2019-07-01 --output=JustinWolferspart8.json && twitterscraper from:JustinWolfers -bd 2019-07-01 -ed 2020-05-19 --output=JustinWolferspart9.json import codecs, json import pandas as pd import openpyxl from others import create_excel_file, print_df_to_excel parts = 9 with codecs.open('JustinWolferspart{}.json'.format(1), 'r', 'utf-8') as f: tweets = json.load(f, encoding='utf-8') df = pd.read_json('JustinWolferspart{}.json'.format(1), encoding='utf-8') print(df) for partnumber in range(2, parts + 1, 1): with codecs.open('JustinWolferspart{}.json'.format(partnumber), 'r', 'utf-8') as f: tweets = json.load(f, encoding='utf-8') dfread = pd.read_json('JustinWolferspart{}.json'.format(partnumber), encoding='utf-8') print(dfread) df = df.append(dfread) name = 'JustinWolfers' write_excel = create_excel_file('./results/{}_results.xlsx'.format(name)) wb = openpyxl.load_workbook(write_excel) ws = wb[wb.sheetnames[-1]] print_df_to_excel(df=df, ws=ws) wb.save(write_excel)