class BoardPro:

    def __init__(self):
        # print(f'basedir: {basedir}')
        self.reader = FileReader()
        self.datapath = os.path.abspath('com_stock_api/board')

    def process(self):
        file_data = self.get_data()
        data = self.refine_data(file_data)
        # self.save_data(data)
        return data

    def get_data(self):
        self.reader.context = os.path.join(self.datapath, 'data')
        self.reader.fname = 'kyobo_notice.csv'
        notice_file = self.reader.csv_to_dframe()
        # print(notice_file)
        return notice_file
    
    @staticmethod
    def refine_data(data):
        # 컬럼명 변경
        data = data.rename({'제목': 'title', '내용': 'content', '작성일자': 'regdate'}, axis='columns')
        data = data.sort_values(by=['regdate'], axis=0)
        data['email'] = '*****@*****.**'
        data['article_type'] = 'Notice'
        data = data.drop('url', axis=1)

        # print(data['content'][1])
        for idx in range(len(data['content'])):
            con = re.sub('<!--(.+?)-->', '', str(data['content'][idx]))
            con = con.replace('<!--', '')
            con = con.replace('교보증권', 'Stock Psychic')
            data['content'][idx] = con
        # data['regdate'] = ['20'+ regdate for regdate in data['regdate']]

        

        print(data)
        return data

    def save_data(self, data):
        self.reader.context = os.path.join(self.datapath, 'saved_data')
        self.reader.fname = 'kyobo_notice_database.csv'
        data.to_csv(self.reader.new_file(), index=False)
        print('file saved')
Beispiel #2
0
 def __init__(self):
     self.reader = FileReader()
     self.path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                              'models', 'member')
Beispiel #3
0
 def __init__(self):
     # print(f'basedir: {basedir}')
     self.fileReader = FileReader()
     self.datapath = os.path.abspath(os.path.dirname(__file__))
Beispiel #4
0
 def __init__(self):
     self.filereader = FileReader()
     self.isNewMember = False
 def __init__(self):
     self.path = os.path.abspath(__file__ + "/.." + "/saved_data")
     self.fileReader = FileReader()
     self.df = None
     self.ticker = ''
     self.tickers = ['AAPL', 'TSLA']
Beispiel #6
0
 def __init__(self):
     self.reader = FileReader()
     self.datapath = os.path.abspath(os.path.dirname(__file__))
 def __init__(self, k=0.5):
     print(f'basedir: {basedir}')
     self.k = k
     #self.data = os.path.abspath("com_stock_api/naver_news/data")
     self.reader = FileReader()
Beispiel #8
0
 def __init__(self):
     # print(f'basedir: {basedir}')
     self.fileReader = FileReader()
     self.datapath = os.path.abspath('com_stock_api/member')
Beispiel #9
0
 def __init__(self):
     self.filereader = FileReader()
Beispiel #10
0
 def __init__(self):
     self.path = os.path.abspath(__file__ + "/.." + "/data")
     self.fileReader = FileReader()
     self.df = None
     #self.ticker = ''
     self.tickers = ['051910', '011070']
Beispiel #11
0
 def __init__(self):
     self.reader = FileReader()
     self.data = os.path.abspath(__file__ + "/.." + "/data/")
     self.ticker = ''
     self.tickers = ['051910', '011070']
Beispiel #12
0
class StockService():
    def __init__(self):
        self.reader = FileReader()
        self.data = os.path.abspath(__file__ + "/.." + "/data/")
        self.ticker = ''
        self.tickers = ['051910', '011070']

    def hook(self):
        for tic in self.tickers:
            self.ticker = tic
            self.get_data()

    def get_data(self):
        path = self.data
        self.reader.context = os.path.join(path)
        self.reader.fname = '/' + self.ticker + '_dataset.csv'
        df = self.reader.csv_to_dframe()
        print(df)
        #print(df.columns)
        """
        date,open,close,high,low,volume,011070_open,011070_close,011070_high,011070_low,ko_cases,ko_deaths,se_cases,se_deaths
        """

        num_shape = 120
        train = df[:num_shape][["open"]]
        test = df[num_shape:][["close"]]

        #print(type(df['date']))
        # print(df.columns)
        # print(df.shape)
        #train = df[["close"]]
        print(train.shape)
        #test = pd.DataFrame(df["close"])
        print('test:', test.shape)

        sc = MinMaxScaler(feature_range=(0, 1))
        train_scaled = sc.fit_transform(train)

        X_train = []

        #price on next day
        y_train = []

        window = 60

        for i in range(window, num_shape):
            try:
                X_train_ = np.reshape(train_scaled[i - window:i, 0],
                                      (window, 1))
                X_train.append(X_train_)
                y_train.append(train_scaled[i, 0])
            except:
                pass

        X_train = np.stack(X_train)
        #print(X_train.shape)
        y_train = np.stack(y_train)
        #print(y_train.shape)

        model = tf.keras.models.Sequential()

        model.add(
            LSTM(units=50,
                 return_sequences=True,
                 input_shape=(X_train.shape[1], 1)))
        model.add(Dropout(0.2))

        model.add(LSTM(units=50, return_sequences=True))
        model.add(Dropout(0.2))

        model.add(LSTM(units=50, return_sequences=True))
        model.add(Dropout(0.2))

        model.add(LSTM(units=50))
        model.add(Dropout(0.2))

        model.add(Dense(units=1))
        model.summary()

        checkpoint_path = os.path.join(path, self.ticker + '_train',
                                       self.ticker + '.ckpt')
        cp_callback = tf.keras.callbacks.ModelCheckpoint(
            checkpoint_path, save_weights_only=True, verbose=1, period=5)

        model.compile(optimizer='adam',
                      loss='mean_squared_error',
                      metrics=['accuracy'])
        #model.load_weights(checkpoint_path)
        model.save_weights(checkpoint_path.format(epoch=0))
        #tf.keras.Model.save_weights(path)
        hist = model.fit(X_train,
                         y_train,
                         callbacks=[cp_callback],
                         epochs=450,
                         batch_size=32)
        model.save(os.path.join(path, self.ticker + '_pred.h5'))

        #print("loss:"+ str(hist.history['loss']))

        df_volume = np.vstack((train, test))
        # print(train.shape)
        # print(test.shape)
        # print(df_volume.shape)
        inputs = df_volume[df_volume.shape[0] - test.shape[0] - window:]
        inputs = inputs.reshape(-1, 1)
        inputs = sc.transform(inputs)
        num_2 = df_volume.shape[0] - num_shape + window

        X_test = []

        for i in range(window, num_2):
            X_test_ = np.reshape(inputs[i - window:i, 0], (window, 1))
            X_test.append(X_test_)

        X_test = np.stack(X_test)
        #print(X_test.shape)

        predict = model.predict(X_test)
        predict = sc.inverse_transform(predict)
        #df=df.sort_values(by=['date'])
        #print('======================')
        #print('[ test ] ',test.shape)
        #print('[ predict ] ',predict.shape)
        #print(df['date'][:])

        #print(f'type: {type(predict)}, value: {predict[:]}')
        #print(f'type: {type(test)}, value: {test[:]}')
        #print('======================')

        diff = predict - test.astype(float)

        # y_pred = predict
        # y_test_ = np.argmax(test, axis = 1)
        # print(accuracy_score(y_pred, y_test_))

        print("MSE:", np.mean(diff**2))
        print("MAE:", np.mean(abs(diff)))
        print("RMSE:", np.sqrt(np.mean(diff**2)))

        # plt.figure(figsize=(20,7))
        # plt.plot(df['date'].values[:], df_volume[:], color = 'red', label = 'Real lgchem Stock Price')
        # plt.plot(df['date'][-predict.shape[0]:].values, predict, color = 'blue', label = 'Predicted lgchem Stock Price')
        # plt.xticks(np.arange(1000,df[:].shape[0],2000))
        # plt.title('lgchem Stock Price Prediction')
        # plt.xlabel('Date')
        # plt.ylabel('Price (₩)')
        # plt.legend()
        # plt.show()

        pred_ = predict[-1].copy()
        #print(f'type:{type(pred_)}, value:{pred_[:]}')
        prediction_full = []
        window = 60
        df_copy = df.iloc[:, 2:3][1:].values
        #print(f'type:{type(df_copy)}, value:{df_copy[:]}')

        for j in range(20):
            df_ = np.vstack((df_copy, pred_))
            train_ = df_[:num_shape]
            test_ = df_[num_shape:]

            df_volume_ = np.vstack((train_, test_))

            inputs_ = df_volume_[df_volume_.shape[0] - test_.shape[0] -
                                 window:]
            inputs_ = inputs_.reshape(-1, 1)
            inputs_ = sc.transform(inputs_)

            X_test_2 = []

            for k in range(window, num_2):
                X_test_3 = np.reshape(inputs_[k - window:k, 0], (window, 1))
                X_test_2.append(X_test_3)

            X_test_ = np.stack(X_test_2)
            predict_ = model.predict(X_test_)
            pred_ = sc.inverse_transform(predict_)
            prediction_full.append(pred_[-1][0])
            # print(prediction_full)
            df_copy = df_[j:]

        prediction_full_new = np.vstack(
            (predict, np.array(prediction_full).reshape(-1, 1)))

        df_date = df[['date']]

        for h in range(30):
            df_date_add = pd.to_datetime(
                df_date['date'].iloc[-1]) + pd.DateOffset(days=1)
            df_date_add = pd.DataFrame([df_date_add.strftime("%Y-%m-%d")],
                                       columns=['date'])
            df_date = df_date.append(df_date_add)

        df_date = df_date.reset_index(drop=True)

        plt.figure(figsize=(20, 7))
        plt.plot(df['date'].values[:],
                 df_volume[:],
                 color='red',
                 label='Real ' + self.ticker + ' Stock Price')
        plt.plot(df_date['date'][-prediction_full_new.shape[0]:].values,
                 prediction_full_new,
                 color='blue',
                 label='Predicted ' + self.ticker + ' Stock Price')
        plt.xticks(np.arange(100, df[:].shape[0], 20))  #100, 20
        plt.title(self.ticker + ' Stock Price Prediction')
        plt.xlabel('date')
        plt.ylabel('Price (₩)')
        plt.legend()

        image_path = os.path.abspath(__file__ + "/.." + "/image/")
        graph_image = self.ticker + "_graph.png"
        output_image = os.path.join(image_path, graph_image)
        plt.savefig(output_image)
Beispiel #13
0
class MemberChurnPred:

    x_train: object = None
    y_train: object = None
    x_validation: object = None
    y_validation: object = None
    x_test: object = None
    y_test: object = None
    model: object = None

    def __init__(self):
        self.reader = FileReader()

    def hook(self):
        self.get_data()
        self.create_model()
        self.train_model()
        self.eval_model()
        self.debug_model()
        self.get_prob()

    def create_train(self, this):
        return this.drop('Exited', axis=1)

    def create_label(self, this):
        return this['Exited']

    def get_data(self):
        self.reader.context = os.path.join(baseurl, 'data')
        self.reader.fname = 'member_refined.csv'
        data = self.reader.csv_to_dframe()
        data = data.to_numpy()
        # print(data[:60])

        table_col = data.shape[1]
        y_col = 1
        x_col = table_col - y_col
        x = data[:, 0:x_col]
        y = data[:, x_col:]

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.4)
        x_test, x_validation, y_test, y_validation = train_test_split(
            x_test, y_test, test_size=0.4)

        self.x_train = x_train
        self.x_validation = x_validation
        self.x_test = x_test
        self.y_train = y_train
        self.y_validation = y_validation
        self.y_test = y_test

    # 모델 생성
    def create_model(self):
        print('********** create model **********')
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(16, activation='relu'))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # output
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        self.model = model

    # 모델 훈련
    def train_model(self):
        print('********** train model **********')
        self.model.fit(x=self.x_train,
                       y=self.y_train,
                       validation_data=(self.x_validation, self.y_validation),
                       epochs=20,
                       verbose=1)

    # 모델 평가
    def eval_model(self):
        print('********** eval model **********')
        results = self.model.evaluate(x=self.x_test, y=self.y_test, verbose=2)
        for name, value in zip(self.model.metrics_names, results):
            print('%s: %.3f' % (name, value))

    # 모델 디버깅
    def debug_model(self):
        print(f'self.train_data: \n{(self.x_train, self.y_train)}')
        print(
            f'self.validation_data: \n{(self.x_validation, self.y_validation)}'
        )
        print(f'self.test_data: \n{(self.x_test, self.y_test)}')

    # ---------- 확률 ----------
    member_id_list = []
    model_y_list = []
    true_y_list = []
    prob_churn_list = []

    def get_prob(self):
        self.reader.context = os.path.join(
            baseurl, os.path.join('member', 'saved_data'))
        self.reader.fname = 'member_refined.csv'
        data = self.reader.csv_to_dframe()
        y = data['Exited']
        member_ids = data['CustomerId']
        data = self.create_train(data)

        data = data.to_numpy()

        scaler = StandardScaler()
        self.x_train = scaler.fit_transform(self.x_train)
        self.x_test = scaler.transform(self.x_test)

        new_model = LogisticRegression()
        new_model.fit(self.x_train, self.y_train)

        refine_data = scaler.transform(data)
        model_answers = new_model.predict(refine_data)

        self.member_id_list = member_ids.tolist()
        self.model_y_list = model_answers.tolist()
        # print(self.model_y_list)
        self.true_y_list = y.tolist()

        proba = new_model.predict_proba(refine_data)
        print(proba)
        print(proba[1][0])
        churn_proba = np.array([proba[i][1] for i in range(len(proba))])
        # print(churn_proba)

        self.prob_churn_list = churn_proba.tolist()

        self.save_proba_file(data, churn_proba, proba)

    def save_proba_file(self, data, churn_proba, proba):
        columns = ['회원ID', '모델 답', '실제 답', '이탈 가능성']
        refined_dict = {
            'MemberID': self.member_id_list,
            'Model_Y': self.model_y_list,
            'True_Y': self.true_y_list,
            'Prob_churn': self.prob_churn_list
        }

        refined_data = pd.DataFrame(refined_dict)
        print(refined_data)

        context = os.path.join(os.path.join(baseurl, 'memberChurn_pred'),
                               'saved_data')
        refined_data.to_csv(os.path.join(context, 'member_churn_prob.csv'),
                            index=False)
        print('file saved')