def generate_Z_msr_org(numOfBuses, numOfLines, bus_data_df, topo_mat, file_name): import pandas as pd import numpy as np import openpyxl from openpyxl import load_workbook # Creating Measurement Data to run state estimation bus_data = bus_data_df[[ 'Remote controlled bus number', 'Load MW', 'Generation MW' ]] bus_data.columns = ['Bus number', 'Load', 'Generation'] # Correcting the load generation for a lossless DC system correction_load = sum(bus_data['Load']) - sum(bus_data['Generation']) print("correction_load: ", correction_load) # Adding the correction load to the largest generator bus_data['Generation'].loc[ bus_data['Generation'].idxmax()] += correction_load # correction_check = sum(bus_data['Load']) - sum(bus_data['Generation']) # print("correction_check: ", correction_check) # Bus Power = Bus Gen - Bus Load bus_data['Bus Power'] = bus_data['Generation'] - bus_data['Load'] print("bus_data:\n", bus_data.head()) # Padding 0 in the top of the data from reference Z_data_bus_power = pd.DataFrame( pd.concat([pd.Series([0]), bus_data['Bus Power']])) # Topomat containing only the bus power rows along with reference bus B_mat_bus_power = pd.concat( [topo_mat.loc[0:0], topo_mat.loc[numOfLines * 2 + 1:]]) # Estimating the states fromt the bus power data state_original = np.linalg.pinv(B_mat_bus_power) @ Z_data_bus_power # Calculating the Z_msr_org using the Topology Matrix and states Z_msr_org = topo_mat @ state_original Z_msr_org.columns = ['Data'] # Saving the data book = load_workbook(file_name) writer = pd.ExcelWriter(file_name, engine='openpyxl') writer.book = book Z_msr_org.to_excel(writer, "Measurement Data", index=False) bus_data.to_excel(writer, "Bus Data", index=False) writer.save() writer.close() # saving complete ! print("Z_msr_org:\n", Z_msr_org.head()) return Z_msr_org, bus_data
def read(self): try: if len(file_csv) == 1: self._rows = csv.DictReader(open(self.csv_file)) else: # Use PD df_merged = pd.concat( [pd.read_csv(f, sep=',') for f in self.csv_file], ignore_index=True, sort=False) self._rows = df_merged except Exception as ERR: print('[CRITICAL] ' + str(ERR))
InteractiveShell.ast_node_interactivity = "all" train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.25, random_state=1) train_x.shape test_x.shape train_y.shape test_y.shape linear_model = LinearRegression() linear_model linear_model.fit(train_x, train_y) test_prediction = linear_model.predict(test_x) print(linear_model.coef_) df_model = pd.DataFrame({'features': x.columns, 'coeff': linear_model.coef_}) df_model = df_model.sort_values(by=['coeff']) df_model df_model.plot(x='features', y='coeff', kind='bar', figsize=(15, 10)) plt.show() fdf = pd.concat([test_x, test_y], 1) fdf['Predicted'] = np.round(predict_test, 1) fdf['Prediction_Error'] = fdf[''] - fdf['Predicted'] # Add something for fdf[''] - maybe fdf['Death'] fdf
import numpy as np import matplotlib from matplotlib import pyplot as plt votes = [] for line in open ("ELECTION_ID"): year = line.split(" ")[0] header = pd.read_csv(year + ".csv", nrows = 1).dropna(axis = 1) d = header.iloc[0].to_dict() ### The following line can't even be executed df = pd.read_csv(year+".csv", index_col = 0,thousands = ",", skiprows = [1]) df.rename(inplace = True, columns = d) df.dropna(inplace = True, axis = 1) df["Year"] = 2004 votes.append(df) for year in range(len(votes)): majorVote = pd.concat([votes[year][['Democratic','Republican','Total Votes Cast','Year']]], axis = 1).head(1) majorVote['Republican Vote Share'] = majorVote['Republican']/majorVote['Total Votes Cast'] if year == 0: voteShare = pd.concat([majorVote], axis = 1) else: voteShare = pd.concat([voteShare, majorVote], axis = 0) ax = voteShare.plot(x = 'Year', y = "Republican Vote Share", \ title = "President General Election Results in Accomack County, Virginia") ax. get_figure().savefig("accomack.png") ### Plot is wrong
df['col1'].apply(times2) df['col3'].apply(len) #length #Get Column and index names: df.columns '''return the column''' df.head() #return the top 5 df.index #Important function df.sort_values(by='col2') # sort df.isnull() '#Return null value in true or false' df.dropna() '#drop all drop with NAN', "thresh=2" allow for row with less than 2 nan to keep their row df.fillna("STRING OR NUMBER of your choice") df.pivot_table(values='D',index=['A', 'B'],columns=['C']) #pivot table #Concatenation, Merging, Joining #Concatenation basically glues together DataFrames. Keep in mind that dimensions should match along the axis you are concatenating on. pd.concat([df1,df2,df3]) "axis=1" #Merging allow you to merge DataFrames together using similar logic as merging sql pd.merge(left,right,how='inner',on='key') #join two potentially differently-indexed Dataframe into a single result left.join(right) # 'how='outer'' EXAMPLE: left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=['K0', 'K1', 'K2']) right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index=['K0', 'K2', 'K3']) SciPy
# spare is set to False to ensure that encoded columns are returns as numpy array as opposed to sparse matrix OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False) OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(train_X[object_cols])) OH_cols_valid = pd.DataFrame(OH_encoder.transform(val_X[object_cols])) # one-hot encoding removes index; adding it back OH_cols_train.index = train_X.index OH_cols_valid.index = val_X.index # remove categorical columns (will replace with one-hot encoding) num_train_X = train_X.drop(object_cols, axis=1) num_val_X = val_X.drop(object_cols, axis=1) # add one-hot encoded columns to numerical features OH_train_X = pd.concat([num_train_X, OH_cols_train], axis=1) OH_val_X = pd.concat([num_val_X, OH_cols_valid], axis=1) '''Pipelines benefits: cleaner code, fewer bugs, easier to productionize, more options for model validation ''' from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestRegressor # preprocessing numerical data numerical_transformer = SimpleImputer(strategy='constant') # preprocessing categorical data categorical_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='most_frequent') ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])
#getting sheet names xls=pd.ExcelFile('listings.xlsx') exchanges = xls.sheet_names print(exchanges) #read excel files amex = pd.read_excel('listings.xlsx', sheetname=['amex', 'nasdaq'], na_value='n/a') listings['nasdaq'].info() #concatenate 2 sheets amex = pd.read_excel('list.xlsx', sheet_names='amex', na_value='n/a') nyse = pd.read_excel('list.xlsx', sheet_names='nyse', na_value='n/a') pd.concat([amex, nyse]).info() amex['Exchange'] = 'AMEX' # add colunn reference source nyse['Exchange'] = 'NYSE' listings = pd.concat([amex, nyse]) #to concatenat all sheets xls = pd.ExcelFile('listings.xlsx') exchanges = xls.sheet_names listings = [] # creat a empty list for exchange in exchanges listings = pd.read_excel(xls, sheetname=exchange) listings['Exchange'] = exchange #add reference colunn listings.append(listings) #add DataFrame to list
SyntaxError: unexpected indent >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df 0 1 2 3 0 0.084157 -1.101703 -1.707957 0.753288 1 1.405378 -0.425304 -0.063559 0.765215 2 -0.606850 -0.756825 0.954330 -1.167361 3 1.259957 0.829929 0.246907 0.541862 4 0.726531 -1.009214 -0.421701 -1.029575 5 -0.065955 0.068362 -1.770766 1.142800 6 -0.322989 0.075362 -1.828991 1.053472 7 -0.404658 0.134083 -0.711269 -1.139436 8 0.826835 -0.311875 0.702583 -1.542086 9 -0.217703 0.159495 -0.077834 -1.304036 >>> pieces = [df[:3], df[3:7], df[7:]] >>> pd.concat(pieces) 0 1 2 3 0 0.084157 -1.101703 -1.707957 0.753288 1 1.405378 -0.425304 -0.063559 0.765215 2 -0.606850 -0.756825 0.954330 -1.167361 3 1.259957 0.829929 0.246907 0.541862 4 0.726531 -1.009214 -0.421701 -1.029575 5 -0.065955 0.068362 -1.770766 1.142800 6 -0.322989 0.075362 -1.828991 1.053472 7 -0.404658 0.134083 -0.711269 -1.139436 8 0.826835 -0.311875 0.702583 -1.542086 9 -0.217703 0.159495 -0.077834 -1.304036 >>> left = pd.DataFrame({'key':['jo', 'ays'],'lval':[1,2]}) >>> right = pd.DataFrame({'key':['jo', 'ays'],'lval':[8,5]}) >>> left key lval
import panda as pd from numpy.random import randn df1 = pd.DataFrame({'A': 'A0 A1'.split(), 'B': 'B0 B1'.split()}, index=[0, 1]) df2 = pd.DataFrame({'A': 'A2 A3'.split(), 'B': 'B2 B3'.split()}, index=[2, 3]) df3 = pd.DataFrame({'A': 'A4 A5'.split(), 'B': 'B4 B5'.split()}, index=[4, 5]) # lo que vamos es hacer es juntar dos dataframes de diferentes # maneras # la primer es concatenacion, ten en cuenta que ambos # dataframes necesitan tener las mismas dimensiones pd.concat([df1, df2, df3]) # 0 A0 B0 # 1 A1 B1 # 2 A2 B2 # 3 A3 B3 # 4 A4 B4 # 5 A5 B5 # podemos unirlos por medio de columnas cambias el axis a 1 pd.concat([df1, df2, df3], axis=1) # A B A B A B # 0 A0 B0 NaN NaN NaN NaN # 1 A1 B1 NaN NaN NaN NaN # 2 NaN NaN A2 B2 NaN NaN # 3 NaN NaN A3 B3 NaN NaN