class BoardPro: def __init__(self): # print(f'basedir: {basedir}') self.reader = FileReader() self.datapath = os.path.abspath('com_stock_api/board') def process(self): file_data = self.get_data() data = self.refine_data(file_data) # self.save_data(data) return data def get_data(self): self.reader.context = os.path.join(self.datapath, 'data') self.reader.fname = 'kyobo_notice.csv' notice_file = self.reader.csv_to_dframe() # print(notice_file) return notice_file @staticmethod def refine_data(data): # 컬럼명 변경 data = data.rename({'제목': 'title', '내용': 'content', '작성일자': 'regdate'}, axis='columns') data = data.sort_values(by=['regdate'], axis=0) data['email'] = '*****@*****.**' data['article_type'] = 'Notice' data = data.drop('url', axis=1) # print(data['content'][1]) for idx in range(len(data['content'])): con = re.sub('<!--(.+?)-->', '', str(data['content'][idx])) con = con.replace('<!--', '') con = con.replace('교보증권', 'Stock Psychic') data['content'][idx] = con # data['regdate'] = ['20'+ regdate for regdate in data['regdate']] print(data) return data def save_data(self, data): self.reader.context = os.path.join(self.datapath, 'saved_data') self.reader.fname = 'kyobo_notice_database.csv' data.to_csv(self.reader.new_file(), index=False) print('file saved')
def __init__(self): self.reader = FileReader() self.path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'models', 'member')
def __init__(self): # print(f'basedir: {basedir}') self.fileReader = FileReader() self.datapath = os.path.abspath(os.path.dirname(__file__))
def __init__(self): self.filereader = FileReader() self.isNewMember = False
def __init__(self): self.path = os.path.abspath(__file__ + "/.." + "/saved_data") self.fileReader = FileReader() self.df = None self.ticker = '' self.tickers = ['AAPL', 'TSLA']
def __init__(self): self.reader = FileReader() self.datapath = os.path.abspath(os.path.dirname(__file__))
def __init__(self, k=0.5): print(f'basedir: {basedir}') self.k = k #self.data = os.path.abspath("com_stock_api/naver_news/data") self.reader = FileReader()
def __init__(self): # print(f'basedir: {basedir}') self.fileReader = FileReader() self.datapath = os.path.abspath('com_stock_api/member')
def __init__(self): self.filereader = FileReader()
def __init__(self): self.path = os.path.abspath(__file__ + "/.." + "/data") self.fileReader = FileReader() self.df = None #self.ticker = '' self.tickers = ['051910', '011070']
def __init__(self): self.reader = FileReader() self.data = os.path.abspath(__file__ + "/.." + "/data/") self.ticker = '' self.tickers = ['051910', '011070']
class StockService(): def __init__(self): self.reader = FileReader() self.data = os.path.abspath(__file__ + "/.." + "/data/") self.ticker = '' self.tickers = ['051910', '011070'] def hook(self): for tic in self.tickers: self.ticker = tic self.get_data() def get_data(self): path = self.data self.reader.context = os.path.join(path) self.reader.fname = '/' + self.ticker + '_dataset.csv' df = self.reader.csv_to_dframe() print(df) #print(df.columns) """ date,open,close,high,low,volume,011070_open,011070_close,011070_high,011070_low,ko_cases,ko_deaths,se_cases,se_deaths """ num_shape = 120 train = df[:num_shape][["open"]] test = df[num_shape:][["close"]] #print(type(df['date'])) # print(df.columns) # print(df.shape) #train = df[["close"]] print(train.shape) #test = pd.DataFrame(df["close"]) print('test:', test.shape) sc = MinMaxScaler(feature_range=(0, 1)) train_scaled = sc.fit_transform(train) X_train = [] #price on next day y_train = [] window = 60 for i in range(window, num_shape): try: X_train_ = np.reshape(train_scaled[i - window:i, 0], (window, 1)) X_train.append(X_train_) y_train.append(train_scaled[i, 0]) except: pass X_train = np.stack(X_train) #print(X_train.shape) y_train = np.stack(y_train) #print(y_train.shape) model = tf.keras.models.Sequential() model.add( LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1))) model.add(Dropout(0.2)) model.add(LSTM(units=50, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(units=50, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(units=50)) model.add(Dropout(0.2)) model.add(Dense(units=1)) model.summary() checkpoint_path = os.path.join(path, self.ticker + '_train', self.ticker + '.ckpt') cp_callback = tf.keras.callbacks.ModelCheckpoint( checkpoint_path, save_weights_only=True, verbose=1, period=5) model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) #model.load_weights(checkpoint_path) model.save_weights(checkpoint_path.format(epoch=0)) #tf.keras.Model.save_weights(path) hist = model.fit(X_train, y_train, callbacks=[cp_callback], epochs=450, batch_size=32) model.save(os.path.join(path, self.ticker + '_pred.h5')) #print("loss:"+ str(hist.history['loss'])) df_volume = np.vstack((train, test)) # print(train.shape) # print(test.shape) # print(df_volume.shape) inputs = df_volume[df_volume.shape[0] - test.shape[0] - window:] inputs = inputs.reshape(-1, 1) inputs = sc.transform(inputs) num_2 = df_volume.shape[0] - num_shape + window X_test = [] for i in range(window, num_2): X_test_ = np.reshape(inputs[i - window:i, 0], (window, 1)) X_test.append(X_test_) X_test = np.stack(X_test) #print(X_test.shape) predict = model.predict(X_test) predict = sc.inverse_transform(predict) #df=df.sort_values(by=['date']) #print('======================') #print('[ test ] ',test.shape) #print('[ predict ] ',predict.shape) #print(df['date'][:]) #print(f'type: {type(predict)}, value: {predict[:]}') #print(f'type: {type(test)}, value: {test[:]}') #print('======================') diff = predict - test.astype(float) # y_pred = predict # y_test_ = np.argmax(test, axis = 1) # print(accuracy_score(y_pred, y_test_)) print("MSE:", np.mean(diff**2)) print("MAE:", np.mean(abs(diff))) print("RMSE:", np.sqrt(np.mean(diff**2))) # plt.figure(figsize=(20,7)) # plt.plot(df['date'].values[:], df_volume[:], color = 'red', label = 'Real lgchem Stock Price') # plt.plot(df['date'][-predict.shape[0]:].values, predict, color = 'blue', label = 'Predicted lgchem Stock Price') # plt.xticks(np.arange(1000,df[:].shape[0],2000)) # plt.title('lgchem Stock Price Prediction') # plt.xlabel('Date') # plt.ylabel('Price (₩)') # plt.legend() # plt.show() pred_ = predict[-1].copy() #print(f'type:{type(pred_)}, value:{pred_[:]}') prediction_full = [] window = 60 df_copy = df.iloc[:, 2:3][1:].values #print(f'type:{type(df_copy)}, value:{df_copy[:]}') for j in range(20): df_ = np.vstack((df_copy, pred_)) train_ = df_[:num_shape] test_ = df_[num_shape:] df_volume_ = np.vstack((train_, test_)) inputs_ = df_volume_[df_volume_.shape[0] - test_.shape[0] - window:] inputs_ = inputs_.reshape(-1, 1) inputs_ = sc.transform(inputs_) X_test_2 = [] for k in range(window, num_2): X_test_3 = np.reshape(inputs_[k - window:k, 0], (window, 1)) X_test_2.append(X_test_3) X_test_ = np.stack(X_test_2) predict_ = model.predict(X_test_) pred_ = sc.inverse_transform(predict_) prediction_full.append(pred_[-1][0]) # print(prediction_full) df_copy = df_[j:] prediction_full_new = np.vstack( (predict, np.array(prediction_full).reshape(-1, 1))) df_date = df[['date']] for h in range(30): df_date_add = pd.to_datetime( df_date['date'].iloc[-1]) + pd.DateOffset(days=1) df_date_add = pd.DataFrame([df_date_add.strftime("%Y-%m-%d")], columns=['date']) df_date = df_date.append(df_date_add) df_date = df_date.reset_index(drop=True) plt.figure(figsize=(20, 7)) plt.plot(df['date'].values[:], df_volume[:], color='red', label='Real ' + self.ticker + ' Stock Price') plt.plot(df_date['date'][-prediction_full_new.shape[0]:].values, prediction_full_new, color='blue', label='Predicted ' + self.ticker + ' Stock Price') plt.xticks(np.arange(100, df[:].shape[0], 20)) #100, 20 plt.title(self.ticker + ' Stock Price Prediction') plt.xlabel('date') plt.ylabel('Price (₩)') plt.legend() image_path = os.path.abspath(__file__ + "/.." + "/image/") graph_image = self.ticker + "_graph.png" output_image = os.path.join(image_path, graph_image) plt.savefig(output_image)
class MemberChurnPred: x_train: object = None y_train: object = None x_validation: object = None y_validation: object = None x_test: object = None y_test: object = None model: object = None def __init__(self): self.reader = FileReader() def hook(self): self.get_data() self.create_model() self.train_model() self.eval_model() self.debug_model() self.get_prob() def create_train(self, this): return this.drop('Exited', axis=1) def create_label(self, this): return this['Exited'] def get_data(self): self.reader.context = os.path.join(baseurl, 'data') self.reader.fname = 'member_refined.csv' data = self.reader.csv_to_dframe() data = data.to_numpy() # print(data[:60]) table_col = data.shape[1] y_col = 1 x_col = table_col - y_col x = data[:, 0:x_col] y = data[:, x_col:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4) x_test, x_validation, y_test, y_validation = train_test_split( x_test, y_test, test_size=0.4) self.x_train = x_train self.x_validation = x_validation self.x_test = x_test self.y_train = y_train self.y_validation = y_validation self.y_test = y_test # 모델 생성 def create_model(self): print('********** create model **********') model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(16, activation='relu')) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # output model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) self.model = model # 모델 훈련 def train_model(self): print('********** train model **********') self.model.fit(x=self.x_train, y=self.y_train, validation_data=(self.x_validation, self.y_validation), epochs=20, verbose=1) # 모델 평가 def eval_model(self): print('********** eval model **********') results = self.model.evaluate(x=self.x_test, y=self.y_test, verbose=2) for name, value in zip(self.model.metrics_names, results): print('%s: %.3f' % (name, value)) # 모델 디버깅 def debug_model(self): print(f'self.train_data: \n{(self.x_train, self.y_train)}') print( f'self.validation_data: \n{(self.x_validation, self.y_validation)}' ) print(f'self.test_data: \n{(self.x_test, self.y_test)}') # ---------- 확률 ---------- member_id_list = [] model_y_list = [] true_y_list = [] prob_churn_list = [] def get_prob(self): self.reader.context = os.path.join( baseurl, os.path.join('member', 'saved_data')) self.reader.fname = 'member_refined.csv' data = self.reader.csv_to_dframe() y = data['Exited'] member_ids = data['CustomerId'] data = self.create_train(data) data = data.to_numpy() scaler = StandardScaler() self.x_train = scaler.fit_transform(self.x_train) self.x_test = scaler.transform(self.x_test) new_model = LogisticRegression() new_model.fit(self.x_train, self.y_train) refine_data = scaler.transform(data) model_answers = new_model.predict(refine_data) self.member_id_list = member_ids.tolist() self.model_y_list = model_answers.tolist() # print(self.model_y_list) self.true_y_list = y.tolist() proba = new_model.predict_proba(refine_data) print(proba) print(proba[1][0]) churn_proba = np.array([proba[i][1] for i in range(len(proba))]) # print(churn_proba) self.prob_churn_list = churn_proba.tolist() self.save_proba_file(data, churn_proba, proba) def save_proba_file(self, data, churn_proba, proba): columns = ['회원ID', '모델 답', '실제 답', '이탈 가능성'] refined_dict = { 'MemberID': self.member_id_list, 'Model_Y': self.model_y_list, 'True_Y': self.true_y_list, 'Prob_churn': self.prob_churn_list } refined_data = pd.DataFrame(refined_dict) print(refined_data) context = os.path.join(os.path.join(baseurl, 'memberChurn_pred'), 'saved_data') refined_data.to_csv(os.path.join(context, 'member_churn_prob.csv'), index=False) print('file saved')