def retweets_vs_likes(data):

    tfav = pd.Series(data=data['Likes'].values, index=data['Date'])
    tret = pd.Series(data=data['RTs'].values, index=data['Date'])

    # Likes vs retweets visualization:
    tfav.plot(figsize=(16, 4), label="Likes", legend=True)
    tret.plot(figsize=(16, 4), label="Retweets", legend=True)

    plt.show()
def nombre_tweets_candidates(data, candidate1, candidate2):
    # Affiche le nombre de tweets contenant le nom du candidate1 et celui du candidate2

    #création de deux nouvelles colonnes qui indiquent la présence des noms des candidats dans le tweet
    data['Has_candidate1'] = (candidate1 in data['tweet_textual_content'])
    data['Has_candidate2'] = (candidate2 in data['tweet_textual_content'])

    #création des séries (nombre de likes pour candidat, date) pour candidate1 et candidate2
    tcandidate1 = pd.Series(data[data['Has_candidate1'] == True]['Likes'],
                            index=data['Date'])
    tcandidate2 = pd.Series(data[data['Has_candidate2'] == True]['Likes'],
                            index=data['Date'])

    #visualisation
    tcandidate1.plot(figsize=(16, 4), label="Candidate1", legend=True)
    tcandidate2.plot(figsize=(16, 4), label="Candidate2", legend=True)

    plt.show()
def generate_Z_msr_org(numOfBuses, numOfLines, bus_data_df, topo_mat,
                       file_name):
    import pandas as pd
    import numpy as np
    import openpyxl
    from openpyxl import load_workbook

    # Creating Measurement Data to run state estimation
    bus_data = bus_data_df[[
        'Remote controlled bus number', 'Load MW', 'Generation MW'
    ]]
    bus_data.columns = ['Bus number', 'Load', 'Generation']

    # Correcting the load generation for a lossless DC system
    correction_load = sum(bus_data['Load']) - sum(bus_data['Generation'])
    print("correction_load: ", correction_load)

    # Adding the correction load to the largest generator
    bus_data['Generation'].loc[
        bus_data['Generation'].idxmax()] += correction_load
    # correction_check = sum(bus_data['Load']) - sum(bus_data['Generation'])
    # print("correction_check: ", correction_check)

    # Bus Power = Bus Gen - Bus Load
    bus_data['Bus Power'] = bus_data['Generation'] - bus_data['Load']

    print("bus_data:\n", bus_data.head())

    # Padding 0 in the top of the data from reference
    Z_data_bus_power = pd.DataFrame(
        pd.concat([pd.Series([0]), bus_data['Bus Power']]))

    # Topomat containing only the bus power rows along with reference bus
    B_mat_bus_power = pd.concat(
        [topo_mat.loc[0:0], topo_mat.loc[numOfLines * 2 + 1:]])

    # Estimating the states fromt the bus power data
    state_original = np.linalg.pinv(B_mat_bus_power) @ Z_data_bus_power

    # Calculating the Z_msr_org using the Topology Matrix and states
    Z_msr_org = topo_mat @ state_original
    Z_msr_org.columns = ['Data']
    # Saving the data
    book = load_workbook(file_name)
    writer = pd.ExcelWriter(file_name, engine='openpyxl')
    writer.book = book
    Z_msr_org.to_excel(writer, "Measurement Data", index=False)
    bus_data.to_excel(writer, "Bus Data", index=False)
    writer.save()
    writer.close()
    # saving complete !

    print("Z_msr_org:\n", Z_msr_org.head())

    return Z_msr_org, bus_data
df.head()  # shows only the head list

df.tail()  #

df.values

df["temperature"]
df["day"].head()

df["temperature"] > 20  # shows all the temperatures below 20 degrees from the temperatures.csv file
df["temperature"] < 0  # shows all the temperatures below 0 degrees from the file

df_cool = df[df["temperature"] < 0]
df_cool.head()

df_cool.to_csv("cool.csv")  # saves a new csv file named cool.csv

#save stuff into a new file
df["temperature"].mean()  # this shows the average
df["temperature"].max()  # this shows the maximum temperature
df["temperature"].min()  # this shows the minimum temperature
df["temperature"].value_counts(
)  # it counts how many times a certain value occurs in a row/list

df["temperature"].value_counts().head()
snacks = pd.Series(["Mars", "Twix", "Oreo"])
snacks.value_counts()

df["temperature"].plot()  # shows a table of content thing (grafiek)
Exemple #5
0
import panda as pd

ps=pd.Series([1,2,3,4,5])

ps1=pd.Series([5,6,7,8,9])

ps+ps1
Exemple #6
0
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
#set index names 
df.index.names #check how many name you need
df.index.names = ['Group','Num']
#select the first index
df.xs('G1')
df.xs(['G1',1])
df.xs(1,level='Num')

#GROUP BY function
df.groupby('Company') #supportive function .mean(), std()

#Create a series in Pando - A Series is very similar to a NumPy array. A Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. 
#It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.
pd.Series(data=my_list) #set label "index=labels"
#Can do normal operation just like matrices

#Operations To Find Unique
df['ColName'].unique #return all unique number
df['ColName'].nunique #return the number of unique number
df['col2'].value_counts() #return the count of each unique number

#applying function
def times2(x):
    return x*2
df['col1'].apply(times2)
df['col3'].apply(len) #length

#Get Column and index names:
df.columns '''return the column''' df.head() #return the top 5
>>> df.apply(np.cumsum)
                   A         B         C         D
2013-01-01  0.070158  0.629035  0.199517 -0.157134
2013-01-02 -0.587144 -0.100147  1.749539 -0.137846
2013-01-03  0.380278  0.125308  2.642619 -0.686718
2013-01-04  1.600488  1.067800  2.316169 -0.986318
2013-01-05  1.730403 -0.289345  0.987189 -0.602161
2013-01-06  2.386028 -0.005460  2.269278 -1.798823
>>> df.apply(lambda x: x.max() - x.min())
A    1.877512
B    2.299638
C    2.879002
D    1.580820
dtype: float64
>>> #HISTOGRAMMING
>>> s=pd.Series(np.random.randint(0,7,size=10))
>>> s
0    4
1    4
2    2
3    4
4    1
5    6
6    2
7    2
8    1
9    1
dtype: int32
>>> s.value_counts()
4    3
2    3