Esempio n. 1
0
def main():

    csv = pd.read_csv('city_aqi.csv')
    print(csv[['City', 'AQI']].sort_values('AQI').head(4))
    print(csv.info())

    print(csv.head())

    print('AQI最大值:', csv['AQI'].max())
    print('AQI最小值:', csv['AQI'].min())
    print('AQI均值:', csv['AQI'].mean())

    top5_cities = csv.sort_values(by=['AQI']).head(5)
    print('空气质量最好的5个城市:')
    print(top5_cities)

    top5_bottom_cities = csv.sort_values(by=['AQI'], ascending=False).head(5)
    print('空气质量最差的5个城市:')
    print(top5_bottom_cities)

    print(csv[csv['AQI'] > 40])

    top5_cities.plot(kind='bar',
                     x='City',
                     y='AQI',
                     title='空气质量最好的5个城市',
                     figsize=(10, 10))
    plt.savefig('top5_aqi.png')
    plt.show()
Esempio n. 2
0
def uploaded():

    error = None

    filename = escape(session['csvname'])
    if filename == None:
        error = "Session error: filename is empty!"
        return render_template('csvpreview.html', error=error)


    # check for valid csv
    try:
        csv = CSVHandler(UPLOAD_FOLDER+"/"+filename)
    except IOError:
        return "File not found!"
    except:
        return "Unexpected error: " + sys.exc_info()[0]

    data = csv.head(20)

    # get ontology terms
    allTerms=getOntologyClasses()

    # app.logger.debug(">>>"+str(csv.NF))

    # generate output
    msg = "File ("+filename+") uploaded. Congratulations!"

    # render template
    r = render_template('csvpreview.html', msg=msg, data=data, terms=allTerms, ncols=csv.NF)

    # close the CSV reader
    csv.close()

    return r
Esempio n. 3
0
    def __init__(self, file, mono=True, cap_train=None, sort=False):
        if sort:
            csv = pd.read_csv(file)
            empty_counts = csv['puzzle'].apply(lambda p: p.count('0'))
            csv['num_empty'] = empty_counts
            csv.sort_values(by="num_empty", inplace=True)

            if cap_train is not None:
                csv = csv.head(cap_train)

            self.data = csv
        else:
            self.data = pd.read_csv(file, nrows=cap_train)
        self.mono = mono
        self.cap_train = cap_train
        self.edges = sudoku_edges()
api = tweepy.API(auth,wait_on_rate_limit=True)
#####United Airlines
# Open/Create a file to append data
csvFile = open('tweets.csv', 'w')
#Use csv Writer
csvWriter = csv.writer(csvFile)

for tweet in tweepy.Cursor(api.search,q="#ExploreMLBLR",count=100,
                           lang="en",
                           since="2019-10-03").items():
    #print (tweet.created_at, tweet.text)
    csvWriter.writerow([tweet.user.screen_name, tweet.text.encode('utf-8')])

csv = pd.read_csv('tweets.csv',names=["Username","Tweet"])
count = csv['Username'].value_counts()[:]
csv.head(10)

top2 = count.head(2)
top2

import matplotlib.pyplot as plt
colors =  ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]
top2.plot.pie(y=top2.index,
           shadow=False,
           colors=colors, 
           radius = 1000,
           explode=(0, 0),   # exploding 'Friday'
           startangle=90,
           autopct='%1.1f%%',
           textprops={'fontsize': 10})
Esempio n. 5
0
if __name__ == '__main__':
    date_parser = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
    directory = '2012'
    files_names = os.listdir(directory)
    csv_list = []

    for name in files_names:
        csv = pd.read_csv(
            '%s/%s' % (directory, name),
            # index_col=['id'],
            parse_dates=['datetime'],
            date_parser=date_parser,
            encoding='utf-8',
        )
        csv_list.append(csv)

    result = pd.concat(csv_list, sort=False, ignore_index=True)
    # result.sort_index(inplace=True)
    # result = result.drop('Unnamed: 0', 1)
    result.sort_values(by='datetime', ascending=True, inplace=True)
    result.to_csv('%s/tweets_bitcoin.csv' % directory, index=False)

    csv = pd.read_csv('%s/tweets_bitcoin.csv' % directory,
                      # index_col=['id'],
                      parse_dates=['datetime'],
                      date_parser=date_parser,
                      encoding='utf-8')

    print(csv.head())
    print(len(csv))
                    f.write(str(follower) + '\n')
                print("finished!")
                break
            except tweepy.TweepError as e:
                print("error checking limits: %s" % e)
                # remain_search_limits = 0
                time.sleep(15 * 60)


# eg - applying function to a single user
collect_followers('BimAfolami')  # it works!
BimAfolami = api.get_user('BimAfolami')
BimAfolami.followers_count  #N = 8934
# check that length of this new followers_....csv corresponds to followers.count
csv = pd.read_csv('followers_BimAfolami.csv', header=None)
csv.head()
csv.shape  #output: 8933 rows, 1 col - they NEARLY match...

# eg - try another one
collect_followers('JamesCleverly')
csv = pd.read_csv('followers_JamesCleverly.csv', header=None)
csv.head()
csv.shape  # 73881, 1
JamesCleverly = api.get_user('JamesCleverly')
JamesCleverly.followers_count  #N = 73881 - they match

########### 3 - apply collect_followers() to every elite, saving into separate files ###########

# Now I can run collect_followers() on every elite from filtered 'my_elites.csv' file
my_elites = pd.read_csv('my_elites.csv', index_col=0)
my_elites.shape  # 420, 7