def main(): csv = pd.read_csv('city_aqi.csv') print(csv[['City', 'AQI']].sort_values('AQI').head(4)) print(csv.info()) print(csv.head()) print('AQI最大值:', csv['AQI'].max()) print('AQI最小值:', csv['AQI'].min()) print('AQI均值:', csv['AQI'].mean()) top5_cities = csv.sort_values(by=['AQI']).head(5) print('空气质量最好的5个城市:') print(top5_cities) top5_bottom_cities = csv.sort_values(by=['AQI'], ascending=False).head(5) print('空气质量最差的5个城市:') print(top5_bottom_cities) print(csv[csv['AQI'] > 40]) top5_cities.plot(kind='bar', x='City', y='AQI', title='空气质量最好的5个城市', figsize=(10, 10)) plt.savefig('top5_aqi.png') plt.show()
def uploaded(): error = None filename = escape(session['csvname']) if filename == None: error = "Session error: filename is empty!" return render_template('csvpreview.html', error=error) # check for valid csv try: csv = CSVHandler(UPLOAD_FOLDER+"/"+filename) except IOError: return "File not found!" except: return "Unexpected error: " + sys.exc_info()[0] data = csv.head(20) # get ontology terms allTerms=getOntologyClasses() # app.logger.debug(">>>"+str(csv.NF)) # generate output msg = "File ("+filename+") uploaded. Congratulations!" # render template r = render_template('csvpreview.html', msg=msg, data=data, terms=allTerms, ncols=csv.NF) # close the CSV reader csv.close() return r
def __init__(self, file, mono=True, cap_train=None, sort=False): if sort: csv = pd.read_csv(file) empty_counts = csv['puzzle'].apply(lambda p: p.count('0')) csv['num_empty'] = empty_counts csv.sort_values(by="num_empty", inplace=True) if cap_train is not None: csv = csv.head(cap_train) self.data = csv else: self.data = pd.read_csv(file, nrows=cap_train) self.mono = mono self.cap_train = cap_train self.edges = sudoku_edges()
api = tweepy.API(auth,wait_on_rate_limit=True) #####United Airlines # Open/Create a file to append data csvFile = open('tweets.csv', 'w') #Use csv Writer csvWriter = csv.writer(csvFile) for tweet in tweepy.Cursor(api.search,q="#ExploreMLBLR",count=100, lang="en", since="2019-10-03").items(): #print (tweet.created_at, tweet.text) csvWriter.writerow([tweet.user.screen_name, tweet.text.encode('utf-8')]) csv = pd.read_csv('tweets.csv',names=["Username","Tweet"]) count = csv['Username'].value_counts()[:] csv.head(10) top2 = count.head(2) top2 import matplotlib.pyplot as plt colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"] top2.plot.pie(y=top2.index, shadow=False, colors=colors, radius = 1000, explode=(0, 0), # exploding 'Friday' startangle=90, autopct='%1.1f%%', textprops={'fontsize': 10})
if __name__ == '__main__': date_parser = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') directory = '2012' files_names = os.listdir(directory) csv_list = [] for name in files_names: csv = pd.read_csv( '%s/%s' % (directory, name), # index_col=['id'], parse_dates=['datetime'], date_parser=date_parser, encoding='utf-8', ) csv_list.append(csv) result = pd.concat(csv_list, sort=False, ignore_index=True) # result.sort_index(inplace=True) # result = result.drop('Unnamed: 0', 1) result.sort_values(by='datetime', ascending=True, inplace=True) result.to_csv('%s/tweets_bitcoin.csv' % directory, index=False) csv = pd.read_csv('%s/tweets_bitcoin.csv' % directory, # index_col=['id'], parse_dates=['datetime'], date_parser=date_parser, encoding='utf-8') print(csv.head()) print(len(csv))
f.write(str(follower) + '\n') print("finished!") break except tweepy.TweepError as e: print("error checking limits: %s" % e) # remain_search_limits = 0 time.sleep(15 * 60) # eg - applying function to a single user collect_followers('BimAfolami') # it works! BimAfolami = api.get_user('BimAfolami') BimAfolami.followers_count #N = 8934 # check that length of this new followers_....csv corresponds to followers.count csv = pd.read_csv('followers_BimAfolami.csv', header=None) csv.head() csv.shape #output: 8933 rows, 1 col - they NEARLY match... # eg - try another one collect_followers('JamesCleverly') csv = pd.read_csv('followers_JamesCleverly.csv', header=None) csv.head() csv.shape # 73881, 1 JamesCleverly = api.get_user('JamesCleverly') JamesCleverly.followers_count #N = 73881 - they match ########### 3 - apply collect_followers() to every elite, saving into separate files ########### # Now I can run collect_followers() on every elite from filtered 'my_elites.csv' file my_elites = pd.read_csv('my_elites.csv', index_col=0) my_elites.shape # 420, 7