def compatible(cls, spreadsheet_path): if spreadsheet_path.endswith("csv"): table = pd.read_csv(spreadsheet_path) elif spreadsheet_path.endswith("tsv"): table = pd.read_tsv(spreadsheet_path) elif spreadsheet_path.endswith("xls"): table = pd.read_excel(spreadsheet_path) else: table = pd.read_table(spreadsheet_path) object_headers = dict(Object.column_headings()) for header in object_headers: # Just need one of the primary fields to be present, in theory # Not 100% bulletproof but it'll do if header in table.columns: if object_headers[header]: return True return False
name = args.name file_type = args.type if file_type == "xlsx": file_name = name + '.xlsx' data = pd.read_excel(file_name) if file_type == 'xls': file_name = name + '.xls' data = pd.read_excel(file_name) if file_type == 'csv': file_name = name + '.csv' data = pd.read_csv(file_name) if file_type == 'tsv': file_name = name + '.tsv' data = pd.read_tsv(file_name) row = len(data) col = len(data.iloc[0, :]) result = '' already = set() #用来标记已经完成 tran = { 0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
import xgboost as xgb import lightgbm as lgb from scipy.stats import skew from scipy import stats from scipy.stats.stats import pearsonr from scipy.stats import norm from collections import Counter from sklearn.linear_model import LinearRegression, LassoCV, Ridge, LassoLarsCV, ElasticNetCV from sklearn.model_selection import GridSearchCV, cross_val_score, learning_curve from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, GradientBoostingRegressor from sklearn.preprocessing import StandardScaler, Normalizer, RobustScaler warnings.filterwarnings('ignore') sns.set(style='white', context='notebook', palette='deep') # Load train and Test set train = pd.read_tsv("train.csv") test = pd.read_tsv("test.csv") print("Train data size before dropping Id feature is : {}".format(train.shape)) print("Test data size before dropping Id feature is : {}".format(test.shape)) train_ID = train['train_id'] test_ID = test['train_id'] # Now drop the 'Id' column since it's unnecessary for the prediction process. train.drop('Id', axis=1, inplace=True) test.drop('Id', axis=1, inplace=True) print("Train data size before dropping Id feature is : {}".format(train.shape)) print("Test data size before dropping Id feature is : {}".format(test.shape))
def read_input_tsv(day): import pandas as pd target = f'input/day{day}.txt' return pd.read_tsv(target)
def controller(): data = pd.read_tsv("Bot_Customization.tsv") print(data)
import pandas as pd train = pd.read_tsv('labeledTrainData.tsv', header=0, delimiter='\t', quoting=3)
#판다스-데이터 제공 #데이터 import numpy as np import pandas as pd #데이터 읽어 들이기 - GAP-5yea.tsv(탭으로 구분) gap = pd.read_tsv('c:/JAVA/GAP-5year.tsv', sep='\t') #print(gap) #많은 내용을 출력하기엔 다소 불편 print(gap.head()) #위에서 5줄 print(gap.tail()) #아래에서 5줄 print(gap.info()) #데이터 구조 표시 print(gap.describe()) #기술통계 요약 #데이터조회 #통계자료에서 2007년 한국 데이터 조회 kor = gap.query("country=='Korea,Rep.'") print(kor) kor = gap.query("country=='Korea,Rep.'&year==2007") print(kor) #정렬해서 출력 #year,country순으로 정렬 후 출력 #R dplyr - gap %>% arrange(year,country) sort = gap.sort_values(by=['year', 'country']) print(sort.head()) #부분열 선택하기 #인구수,1인당GDP를 출력 #R dplyr - gap %>% select(pop,gdpPercap) partcol = gap[['pop', 'gdpPercap']] print(partcol.head())