Python concatの例、panda.concat Pythonの例

コード例 #1

0

ファイルを表示

ファイル: functions_for_dataload.py プロジェクト: iddaf/iDDAF

def generate_Z_msr_org(numOfBuses, numOfLines, bus_data_df, topo_mat,
                       file_name):
    import pandas as pd
    import numpy as np
    import openpyxl
    from openpyxl import load_workbook

    # Creating Measurement Data to run state estimation
    bus_data = bus_data_df[[
        'Remote controlled bus number', 'Load MW', 'Generation MW'
    ]]
    bus_data.columns = ['Bus number', 'Load', 'Generation']

    # Correcting the load generation for a lossless DC system
    correction_load = sum(bus_data['Load']) - sum(bus_data['Generation'])
    print("correction_load: ", correction_load)

    # Adding the correction load to the largest generator
    bus_data['Generation'].loc[
        bus_data['Generation'].idxmax()] += correction_load
    # correction_check = sum(bus_data['Load']) - sum(bus_data['Generation'])
    # print("correction_check: ", correction_check)

    # Bus Power = Bus Gen - Bus Load
    bus_data['Bus Power'] = bus_data['Generation'] - bus_data['Load']

    print("bus_data:\n", bus_data.head())

    # Padding 0 in the top of the data from reference
    Z_data_bus_power = pd.DataFrame(
        pd.concat([pd.Series([0]), bus_data['Bus Power']]))

    # Topomat containing only the bus power rows along with reference bus
    B_mat_bus_power = pd.concat(
        [topo_mat.loc[0:0], topo_mat.loc[numOfLines * 2 + 1:]])

    # Estimating the states fromt the bus power data
    state_original = np.linalg.pinv(B_mat_bus_power) @ Z_data_bus_power

    # Calculating the Z_msr_org using the Topology Matrix and states
    Z_msr_org = topo_mat @ state_original
    Z_msr_org.columns = ['Data']
    # Saving the data
    book = load_workbook(file_name)
    writer = pd.ExcelWriter(file_name, engine='openpyxl')
    writer.book = book
    Z_msr_org.to_excel(writer, "Measurement Data", index=False)
    bus_data.to_excel(writer, "Bus Data", index=False)
    writer.save()
    writer.close()
    # saving complete !

    print("Z_msr_org:\n", Z_msr_org.head())

    return Z_msr_org, bus_data

コード例 #2

0

ファイルを表示

ファイル: omegacsv.py プロジェクト: jinkimyoung/nsd

    def read(self):
        try:
            if len(file_csv) == 1:
                self._rows = csv.DictReader(open(self.csv_file))
            else:
                # Use PD
                df_merged = pd.concat(
                    [pd.read_csv(f, sep=',') for f in self.csv_file],
                    ignore_index=True,
                    sort=False)
                self._rows = df_merged

        except Exception as ERR:
            print('[CRITICAL] ' + str(ERR))

コード例 #3

0

ファイルを表示

ファイル: PredictionModel.py プロジェクト: Jackangevine/COVID-19-Outbreak-Visualization-Tool

InteractiveShell.ast_node_interactivity = "all"

train_x, test_x, train_y, test_y = train_test_split(x,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=1)
train_x.shape
test_x.shape
train_y.shape
test_y.shape

linear_model = LinearRegression()
linear_model
linear_model.fit(train_x, train_y)

test_prediction = linear_model.predict(test_x)
print(linear_model.coef_)
df_model = pd.DataFrame({'features': x.columns, 'coeff': linear_model.coef_})
df_model = df_model.sort_values(by=['coeff'])
df_model

df_model.plot(x='features', y='coeff', kind='bar', figsize=(15, 10))
plt.show()

fdf = pd.concat([test_x, test_y], 1)
fdf['Predicted'] = np.round(predict_test, 1)

fdf['Prediction_Error'] = fdf[''] - fdf['Predicted']
# Add something for fdf[''] - maybe fdf['Death']
fdf

コード例 #4

0

ファイルを表示

ファイル: e3.py プロジェクト: harris-ippp/hw-6-zhonglingu

import numpy as np
import matplotlib
from matplotlib import pyplot as plt

votes = []

for line in open ("ELECTION_ID"):
    year = line.split(" ")[0]
    header = pd.read_csv(year + ".csv", nrows = 1).dropna(axis = 1)
    d = header.iloc[0].to_dict()

### The following line can't even be executed
    df = pd.read_csv(year+".csv", index_col = 0,thousands = ",", skiprows = [1])

    df.rename(inplace = True, columns = d)
    df.dropna(inplace = True, axis = 1)
    df["Year"] = 2004
    votes.append(df)

for year in range(len(votes)):
    majorVote = pd.concat([votes[year][['Democratic','Republican','Total Votes Cast','Year']]], axis = 1).head(1)
    majorVote['Republican Vote Share'] = majorVote['Republican']/majorVote['Total Votes Cast']
    if year == 0:
        voteShare = pd.concat([majorVote], axis = 1)
    else:
        voteShare = pd.concat([voteShare, majorVote], axis = 0)

ax  = voteShare.plot(x = 'Year', y = "Republican Vote Share", \
        title = "President General Election Results in Accomack County, Virginia")
ax. get_figure().savefig("accomack.png")
### Plot is wrong

コード例 #5

0

ファイルを表示

df['col1'].apply(times2)
df['col3'].apply(len) #length

#Get Column and index names:
df.columns '''return the column''' df.head() #return the top 5
df.index

#Important function
df.sort_values(by='col2') # sort
df.isnull() '#Return null value in true or false' df.dropna() '#drop all drop with NAN', "thresh=2" allow for row with less than 2 nan to keep their row
df.fillna("STRING OR NUMBER of your choice")
df.pivot_table(values='D',index=['A', 'B'],columns=['C']) #pivot table 

#Concatenation, Merging, Joining
#Concatenation basically glues together DataFrames. Keep in mind that dimensions should match along the axis you are concatenating on.
pd.concat([df1,df2,df3]) "axis=1"
#Merging allow you to merge DataFrames together using similar logic as merging sql
pd.merge(left,right,how='inner',on='key')
#join two potentially differently-indexed Dataframe into a single result
left.join(right) # 'how='outer''
		EXAMPLE:
			left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
			                     'B': ['B0', 'B1', 'B2']},
			                      index=['K0', 'K1', 'K2']) 

			right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
			                    'D': ['D0', 'D2', 'D3']},
			                      index=['K0', 'K2', 'K3'])


SciPy

コード例 #6

0

ファイルを表示

ファイル: ml.py プロジェクト: alanaalfeche/ml-sandbox

# spare is set to False to ensure that encoded columns are returns as numpy array as opposed to sparse matrix
OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)

OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(train_X[object_cols]))
OH_cols_valid = pd.DataFrame(OH_encoder.transform(val_X[object_cols]))

# one-hot encoding removes index; adding it back
OH_cols_train.index = train_X.index
OH_cols_valid.index = val_X.index

# remove categorical columns (will replace with one-hot encoding)
num_train_X = train_X.drop(object_cols, axis=1)
num_val_X = val_X.drop(object_cols, axis=1)

# add one-hot encoded columns to numerical features
OH_train_X = pd.concat([num_train_X, OH_cols_train], axis=1)
OH_val_X = pd.concat([num_val_X, OH_cols_valid], axis=1)
'''Pipelines benefits: cleaner code, fewer bugs, easier to productionize, more options for model validation
'''
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

# preprocessing numerical data
numerical_transformer = SimpleImputer(strategy='constant')

# preprocessing categorical data
categorical_transformer = Pipeline(
    steps=[('imputer', SimpleImputer(strategy='most_frequent')
            ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

コード例 #7

0

ファイルを表示

#getting sheet names
xls=pd.ExcelFile('listings.xlsx')
exchanges = xls.sheet_names
print(exchanges)

#read excel files
amex = pd.read_excel('listings.xlsx', 
						sheetname=['amex', 'nasdaq'], 
						na_value='n/a')
listings['nasdaq'].info()

#concatenate 2 sheets
amex = pd.read_excel('list.xlsx', sheet_names='amex', na_value='n/a')
nyse = pd.read_excel('list.xlsx', sheet_names='nyse', na_value='n/a')

pd.concat([amex, nyse]).info()
amex['Exchange'] = 'AMEX' # add colunn reference source
nyse['Exchange'] = 'NYSE'

listings = pd.concat([amex, nyse])



#to concatenat all sheets
xls = pd.ExcelFile('listings.xlsx')
exchanges = xls.sheet_names
listings = [] # creat a empty list
for exchange in exchanges
	listings = pd.read_excel(xls, sheetname=exchange)
	listings['Exchange'] = exchange #add reference colunn
	listings.append(listings) #add DataFrame to list

コード例 #8

0

ファイルを表示

ファイル: pandas datahandling.py プロジェクト: JoshuaDivineBlessing/Machine-Learning---Pandas

SyntaxError: unexpected indent
>>> df = pd.DataFrame(np.random.randn(10, 4))
>>> df
          0         1         2         3
0  0.084157 -1.101703 -1.707957  0.753288
1  1.405378 -0.425304 -0.063559  0.765215
2 -0.606850 -0.756825  0.954330 -1.167361
3  1.259957  0.829929  0.246907  0.541862
4  0.726531 -1.009214 -0.421701 -1.029575
5 -0.065955  0.068362 -1.770766  1.142800
6 -0.322989  0.075362 -1.828991  1.053472
7 -0.404658  0.134083 -0.711269 -1.139436
8  0.826835 -0.311875  0.702583 -1.542086
9 -0.217703  0.159495 -0.077834 -1.304036
>>> pieces = [df[:3], df[3:7], df[7:]]
>>> pd.concat(pieces)
          0         1         2         3
0  0.084157 -1.101703 -1.707957  0.753288
1  1.405378 -0.425304 -0.063559  0.765215
2 -0.606850 -0.756825  0.954330 -1.167361
3  1.259957  0.829929  0.246907  0.541862
4  0.726531 -1.009214 -0.421701 -1.029575
5 -0.065955  0.068362 -1.770766  1.142800
6 -0.322989  0.075362 -1.828991  1.053472
7 -0.404658  0.134083 -0.711269 -1.139436
8  0.826835 -0.311875  0.702583 -1.542086
9 -0.217703  0.159495 -0.077834 -1.304036
>>> left = pd.DataFrame({'key':['jo', 'ays'],'lval':[1,2]})
>>> right = pd.DataFrame({'key':['jo', 'ays'],'lval':[8,5]})
>>> left
   key  lval

コード例 #9

0

ファイルを表示

import panda as pd
from numpy.random import randn

df1 = pd.DataFrame({'A': 'A0 A1'.split(), 'B': 'B0 B1'.split()}, index=[0, 1])

df2 = pd.DataFrame({'A': 'A2 A3'.split(), 'B': 'B2 B3'.split()}, index=[2, 3])

df3 = pd.DataFrame({'A': 'A4 A5'.split(), 'B': 'B4 B5'.split()}, index=[4, 5])

# lo que vamos es hacer es juntar dos dataframes de diferentes
# maneras

# la primer es concatenacion, ten en cuenta que ambos
# dataframes necesitan tener las mismas dimensiones

pd.concat([df1, df2, df3])
# 0  A0  B0
# 1  A1  B1
# 2  A2  B2
# 3  A3  B3
# 4  A4  B4
# 5  A5  B5

# podemos unirlos por medio de columnas cambias el axis a 1

pd.concat([df1, df2, df3], axis=1)
#      A    B    A    B    A    B
# 0   A0   B0  NaN  NaN  NaN  NaN
# 1   A1   B1  NaN  NaN  NaN  NaN
# 2  NaN  NaN   A2   B2  NaN  NaN
# 3  NaN  NaN   A3   B3  NaN  NaN