def behavior_ext(windows): behavior_sequence = [] for window in windows: behaviorFeature = [] records = np.array(window) if len(records) != 0: # print np.shape(records) pd = pandas.DataFrame(records) pdd = pd.describe() # print pdd[1][0] # for ii in range(1,4): # for jj in range(1,8): # behaviorFeature.append(pdd[ii][jj]) # behaviorFeature.append(pdd[0][1]) behaviorFeature.append(pdd[1][1]) behaviorFeature.append(pdd[2][1]) behaviorFeature.append(pdd[3][1]) # behaviorFeature.append(pdd[0][2]) # behaviorFeature.append(pdd[1][2]) # behaviorFeature.append(pdd[2][2]) # behaviorFeature.append(pdd[3][2]) # behaviorFeature.append(pdd[0][3]) behaviorFeature.append(pdd[1][3]) behaviorFeature.append(pdd[2][3]) behaviorFeature.append(pdd[3][3]) # behaviorFeature.append(pdd[0][4]) behaviorFeature.append(pdd[1][4]) behaviorFeature.append(pdd[2][4]) behaviorFeature.append(pdd[3][4]) # behaviorFeature.append(pdd[0][5]) behaviorFeature.append(pdd[1][5]) behaviorFeature.append(pdd[2][5]) behaviorFeature.append(pdd[3][5]) # behaviorFeature.append(pdd[0][6]) behaviorFeature.append(pdd[1][6]) behaviorFeature.append(pdd[2][6]) behaviorFeature.append(pdd[3][6]) # behaviorFeature.append(pdd[0][7]) behaviorFeature.append(pdd[1][7]) behaviorFeature.append(pdd[2][7]) behaviorFeature.append(pdd[3][7]) behavior_sequence.append(behaviorFeature) return behavior_sequence
Spyder Editor This is a temporary script file. """ #实训1 from sqlalchemy imort create_enqine import pandas as pd import numpy as np engine = create_engine('data\第4章\Training_Master.csv',sep=',',encoding="gbk") print(pd.ndim)#维度 print(pd.shape)#大小 print(pd.memory_usage())#内存信息 print(pd.describe()) def dropNullStd(data): beforelen = data.shape[1] colisNull = data.describe().loc['count'] == 0 for i in range(len(colisNull)): if colisNull[i]: data.drop(colisNull.index[i],axis=1,inplace=True) stdisZero = data.describe().loc['std'] == 0 for i in range(len(stdisZero)): if stdisZero[i]: data.drop(stdisZero.index[i],axis=1,inplace=True) afterlen = data.shape[1] print('剔除的列的数目为:',beforelen-afterlen) print('剔出后数据的形状为:',data.shape) dropNullStd(pd)
# class.mro() 상속 관계를 확인할 수 있는 메서드 # 출력 [<class 'library.class>,<class 'library.class'>,<class 'object'>] # 세줄문자: 텍스트의 한 줄이 끝남을 표시하는 문자 또는 문자열 # (개행 문자, 줄바꿈 문자, EOL과 같은 뜻) import seaborn as sns sns.pairplot(data,diag_kind='kde' = 커널밀도추정곡선, diag_kind = 'hist' = 히스토그램 hue='speices', palette = '색상') import pandas as pd pd.DataFrame(data) = 데이터 프레임으로 만들어줌 pd.describe() = 연산 가능한 숫자를 가진 컬럼을 추출 -> 기본통계량을 산출 what is Object Oriented Programming?(객체 지향 프로그래밍) 문제를 여러 개의 객체 단위로 나눠 작업하는 방식. 클래스를 이용해 함수(처리부분)와 변수(데이터 부분)를 하나로 묶어 객체(인스턴스)를 생성해 사용한다는 점이다. 객체: 실제 존재하는 모든 사물 또는 개념 클래스: 객체를 정의해 놓은 것 인스턴스: 객체와 비슷. 클래스로부터 객체를 만드는 과정을 '클래스의 인스턴스화'라고 부름 객체 - 핸드폰 클래스 - 핸드폰 설계도
# In[88]: pd.shape # In[89]: pd.info() # In[90]: pd.describe() # In[91]: #ADVANCED # 3 Print only the column age pd["age"] # In[93]: #ADVANCED # 4 Print only the columns age,children and charges pd[["age","children","charges"]]
df = df.append( { 'Name': "Geetika", 'Age': 20, 'Email-id': "*****@*****.**", 'Phone-No': 8295689593 }, ignore_index=True) print("\n", df) #Q.2 - Download the dataset from this link , # Click Here # Import the data and print the following : # a.) First 5 rows of Dataframe # b.) First 10 rows of the Dataframe # c.) find basic statistics on the particular dataset. # d.) Find the last 5 rows of the dataframe # e.) Extract the 2nd column and find basic statistics on it. import pandas as p df = p.read_csv("Weather.csv") print("First 5 rows of Dataframe:-", df.head(5)) print("First 10 rows of the Dataframe:-", df[0:10]) print("find basic statistics on the particular dataset:-", df.describe()) print("Find the last 5 rows of the dataframe:-", df.tail(5)) p = df["Location"] print("Extract the 2nd column and find basic statistics on it:-", p.describe())
x = pca.fit_transform(x_no) y = y_no iteraciones = 1000 error = [None] * iteraciones for i in range(0, iteraciones): X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3) lda = LDA() lda.fit(X_train, y_train) error[i] = np.sum(abs(lda.predict(X_test) - y_test)) / len(y_test) import pandas as pd pd = pd.DataFrame(error) print('Linear error', pd.describe(pd)) from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA iteraciones = 1000 error = [None] * iteraciones for i in range(0, iteraciones): X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3) qda = QDA() qda.fit(X_train, y_train) error[i] = np.sum(abs(qda.predict(X_test) - y_test)) / len(y_test) import pandas as pd pd = pd.DataFrame(error)
def describe(self): pd.describe(self.df).apply(lambda s : s.apply(lambda x : format(x, 'f')))