Exemple #1
0
 def compatible(cls, spreadsheet_path):
     if spreadsheet_path.endswith("csv"):
         table = pd.read_csv(spreadsheet_path)
     elif spreadsheet_path.endswith("tsv"):
         table = pd.read_tsv(spreadsheet_path)
     elif spreadsheet_path.endswith("xls"):
         table = pd.read_excel(spreadsheet_path)
     else:
         table = pd.read_table(spreadsheet_path)
     object_headers = dict(Object.column_headings())
     for header in object_headers:
         # Just need one of the primary fields to be present, in theory
         # Not 100% bulletproof but it'll do
         if header in table.columns:
             if object_headers[header]:
                 return True
     return False
Exemple #2
0
name = args.name
file_type = args.type

if file_type == "xlsx":
    file_name = name + '.xlsx'
    data = pd.read_excel(file_name)
if file_type == 'xls':
    file_name = name + '.xls'
    data = pd.read_excel(file_name)
if file_type == 'csv':
    file_name = name + '.csv'
    data = pd.read_csv(file_name)
if file_type == 'tsv':
    file_name = name + '.tsv'
    data = pd.read_tsv(file_name)

row = len(data)
col = len(data.iloc[0, :])
result = ''
already = set()  #用来标记已经完成
tran = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D',
    4: 'E',
    5: 'F',
    6: 'G',
    7: 'H',
    8: 'I',
Exemple #3
0
import xgboost as xgb
import lightgbm as lgb
from scipy.stats import skew
from scipy import stats
from scipy.stats.stats import pearsonr
from scipy.stats import norm
from collections import Counter
from sklearn.linear_model import LinearRegression, LassoCV, Ridge, LassoLarsCV, ElasticNetCV
from sklearn.model_selection import GridSearchCV, cross_val_score, learning_curve
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, Normalizer, RobustScaler
warnings.filterwarnings('ignore')
sns.set(style='white', context='notebook', palette='deep')

# Load train and Test set
train = pd.read_tsv("train.csv")
test = pd.read_tsv("test.csv")

print("Train data size before dropping Id feature is : {}".format(train.shape))
print("Test data size before dropping Id feature is : {}".format(test.shape))

train_ID = train['train_id']
test_ID = test['train_id']

# Now drop the 'Id' column since it's unnecessary for the prediction process.
train.drop('Id', axis=1, inplace=True)
test.drop('Id', axis=1, inplace=True)

print("Train data size before dropping Id feature is : {}".format(train.shape))
print("Test data size before dropping Id feature is : {}".format(test.shape))
Exemple #4
0
def read_input_tsv(day):
    import pandas as pd
    target = f'input/day{day}.txt'
    return pd.read_tsv(target)
Exemple #5
0
def controller():
    data = pd.read_tsv("Bot_Customization.tsv")
    print(data)
Exemple #6
0
import pandas as pd
train = pd.read_tsv('labeledTrainData.tsv',
                    header=0,
                    delimiter='\t',
                    quoting=3)
Exemple #7
0
#판다스-데이터 제공
#데이터
import numpy as np
import pandas as pd

#데이터 읽어 들이기 - GAP-5yea.tsv(탭으로 구분)
gap = pd.read_tsv('c:/JAVA/GAP-5year.tsv', sep='\t')
#print(gap)  #많은 내용을 출력하기엔 다소 불편
print(gap.head())  #위에서 5줄
print(gap.tail())  #아래에서 5줄
print(gap.info())  #데이터 구조 표시
print(gap.describe())  #기술통계 요약

#데이터조회
#통계자료에서 2007년 한국 데이터 조회
kor = gap.query("country=='Korea,Rep.'")
print(kor)
kor = gap.query("country=='Korea,Rep.'&year==2007")
print(kor)

#정렬해서 출력
#year,country순으로 정렬 후 출력
#R dplyr - gap %>% arrange(year,country)
sort = gap.sort_values(by=['year', 'country'])
print(sort.head())

#부분열 선택하기
#인구수,1인당GDP를 출력
#R dplyr - gap %>% select(pop,gdpPercap)
partcol = gap[['pop', 'gdpPercap']]
print(partcol.head())