def similarity(email): members = pd.read_sql_table('members', engine.connect()) engine.connect().close() isZeroBalMem = members[(members['balance'] == 0) & (members['email'] != email)].index members = members.drop(isZeroBalMem) preprocessing = RecommendStockPreprocessing() refined_members = preprocessing.hook_process(members) isExitedMem = refined_members[refined_members['exited']==1].index refined_members = refined_members.drop(isExitedMem) isLessBalMem = refined_members[(refined_members['balance'] <= 2) & (refined_members['email'] != email)].index refined_members = refined_members.drop(isLessBalMem) refined_members.set_index(refined_members['email'], inplace=True) refined_members = refined_members.drop(['email'], axis=1) base_index = refined_members.index base_columns = refined_members.columns row_dist = pd.DataFrame(squareform(pdist(refined_members, metric='euclidean')), columns=base_index, index=base_index) this_mem = row_dist[row_dist.index == email] this_mem = this_mem.reset_index(drop=True) return this_mem.to_dict(orient='records')[-1]
def dataframe(self): main_df = self.df df = pd.read_sql_table('korea_finance', engine.connect()) df = df.loc[(df['ticker'] == self.ticker) & (df['date'] > '2019-12-31') & (df['date'] < '2020-07-01')] df['date'] = pd.to_datetime(df['date']) df = df.drop(['ticker', 'id'], axis=1) df = df[df['date'].notnull() == True].set_index('date') # print(df) tic = [t for t in self.tickers if t != self.ticker] df2 = pd.read_sql_table('korea_finance', engine.connect()) df2 = df2.loc[(df2['ticker'] == tic[0]) & (df2['date'] > '2019-12-31') & (df2['date'] < '2020-07-01')] df2 = df2.rename( columns={ 'open': tic[0] + '_open', 'close': tic[0] + '_close', 'high': tic[0] + '_high', 'low': tic[0] + '_low' }) df2 = df2.drop(['id', 'ticker', 'volume'], axis=1) df2 = df2[df2['date'].notnull() == True].set_index('date') covid_json = json.dumps(KoreaCovids.get()[0], default=lambda x: x.__dict__) df3 = pd.read_json(covid_json) df3 = df3.loc[(df3['date'] > '2019-12-31') & (df3['date'] < '2020-07-01')] df3 = df3.drop([ 'id', 'total_cases', 'total_deaths', 'seoul_cases', 'seoul_deaths' ], axis=1) df3 = df3[df3['date'].notnull() == True].set_index('date') main_df = df.join(df2, how='outer') main_df = main_df.join(df3, how='outer') main_df[['ko_cases', 'ko_deaths', 'se_cases', 'se_deaths']] = main_df[[ 'ko_cases', 'ko_deaths', 'se_cases', 'se_deaths' ]].fillna(value=0) main_df = main_df.astype(float).interpolate() main_df = main_df.fillna(value=0) print(main_df) print(main_df.columns) output_file = self.ticker + '_dataset.csv' result = os.path.join(self.path, output_file) main_df.to_csv(result) return main_df
def get_data(self): members = pd.read_sql_table('members', engine.connect()) kospis = pd.read_sql_table('korea_finance', engine.connect()) nasdaqs = pd.read_sql_table('yahoo_finance', engine.connect()) # kospi의 금액을 모두 20201030 현재 환율 1129.16으로 나눔 kospis['open'] = [round(float(k)/1129.16, 4) for k in kospis['open']] kospis['close'] = [round(float(k)/1129.16, 4) for k in kospis['close']] kospis['high'] = [round(float(k)/1129.16, 4) for k in kospis['high']] kospis['low'] = [round(float(k)/1129.16, 4) for k in kospis['low']] self.members = members self.kospis = kospis self.nasdaqs = nasdaqs
def similarity(email): members = pd.read_sql_table('members', engine.connect()) preprocessing = RecommendStockPreprocessing() refined_members = preprocessing.hook_process(members) isExitedMem = refined_members[refined_members['exited']==1].index refined_members = refined_members.drop(isExitedMem) isZeroBalMem = refined_members[refined_members['balance']==0].index refined_members = refined_members.drop(isZeroBalMem) print(len(refined_members)) refined_members.set_index(refined_members['email'], inplace=True) refined_members = refined_members.drop(['email'], axis=1) print(f'REFINED MEMBERS: \n{refined_members}') base_index = refined_members.index base_columns = refined_members.columns row_dist = pd.DataFrame(squareform( pdist(refined_members, metric='euclidean')), columns=base_index, index=base_index) # print(f'ROWDIST: \n {row_dist}') this_mem = row_dist[row_dist.index == email] this_mem = this_mem.reset_index(drop=True) # print(this_mem.to_dict(orient='records')) return this_mem.to_dict(orient='records')[-1]
def get_data(self): data = pd.read_sql_table('members', engine.connect()) # 전처리 modeling_data_process = MemberModelingDataPreprocessing() refined_data = modeling_data_process.hook_process(data) data = refined_data.to_numpy() table_col = data.shape[1] y_col = 1 x_col = table_col - y_col x = data[:, 0:x_col] y = data[:, x_col:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4) x_test, x_validation, y_test, y_validation = train_test_split( x_test, y_test, test_size=0.4) self.x_train = x_train self.x_validation = x_validation self.x_test = x_test self.y_train = y_train self.y_validation = y_validation self.y_test = y_test
def substitute(self): members = pd.read_sql_table('members', engine.connect()) preprocessing = RecommendStockPreprocessing() refined_members = preprocessing.hook_process(members) isExitedMem = refined_members[refined_members['exited'] == 1].index refined_members = refined_members.drop(isExitedMem) print(f'REFINED MEMBERS: \n{refined_members}') refined_members.set_index(refined_members['email'], inplace=True) refined_members = refined_members.drop(['email'], axis=1) print(f'REFINED MEMBERS AFTER EMAIL INDEXING: \n{refined_members}') base_columns = refined_members.columns for email in refined_members.index: this_member = pd.DataFrame(refined_members.loc[email, base_columns]).T else_members = refined_members.loc[:, base_columns].drop(email, axis=0) for mem in else_members.index: self._feed_dict = { 'this_member': this_member.loc[email, base_columns], 'target_member': else_members.loc[mem, base_columns] } self.create_similarity_model()
def similarMembersTradings(sim_members, email): tradings = pd.read_sql_table('tradings', engine.connect()) this_members_tradings = list(tradings[tradings['email'] == email]['stock_ticker']) match_tradings = pd.DataFrame(columns=('id', 'email', 'stock_type', 'stock_ticker', 'stock_qty', 'price', 'trading_date')) for mem, prob in sim_members: match_tradings = pd.concat([match_tradings, tradings[tradings['email'] == mem]]) stocks_size = list(match_tradings.groupby('stock_ticker').size().sort_values(ascending=False).index) stocks_list = [{'stock_ticker':s, 'stock_type': str(match_tradings[match_tradings['stock_ticker'] == s]['stock_type'].unique()[0]), 'email': email} for s in stocks_size if s not in this_members_tradings] return stocks_list
def hook_process(self, member_data): this = self.filereader members = member_data this.train = members if isinstance(this.train, MemberDto): m = this.train _data = {'email': m.email, 'password': m.password, 'name': m.name, 'geography': m.geography, 'gender': m.gender, 'age': int(m.age), 'profile': m.profile, 'tenure': int(m.tenure), 'stock_qty': int(m.stock_qty), 'balance': float(m.balance), 'has_credit': int(m.has_credit), 'credit_score': int(m.credit_score), 'is_active_member': int(m.is_active_member), 'estimated_salary': float(m.estimated_salary), 'role': m.role, 'probability_churn': float(m.probability_churn), 'exited': int(m.exited)} this.train = pd.DataFrame([_data]) self.isNewMember = True members_data = pd.read_sql_table('members', engine.connect()) this.train = pd.concat([members_data, this.train], ignore_index=True) # isAdmin = this.train['email'] == '*****@*****.**' # this.train = this.train[~isAdmin] # 컬럼 삭제 this = self.drop_feature(this, 'email') this = self.drop_feature(this, 'password') this = self.drop_feature(this, 'name') this = self.drop_feature(this, 'profile') this = self.drop_feature(this, 'role') this = self.drop_feature(this, 'probability_churn') # 데이터 정제 this = self.geography_nominal(this) this = self.gender_nominal(this) this = self.age_ordinal(this) this = self.drop_feature(this, 'age') this = self.creditScore_ordinal(this) this = self.balance_ordinal(this) this = self.estimatedSalary_ordinal(this) # 고객의 서비스 이탈과 각 칼럼간의 상관계수 # self.correlation_member_secession(this.train) # label 컬럼 재배치 this = self.columns_relocation(this) if self.isNewMember: this.train = this.train.tail(1) this.train.index = [0] print(f'EVERYTHING IS DONE: \n{this.train}') return this.train
def create_dataframe(self): main_df = self.df #1. Bring history of a chosen ticker df = pd.read_sql_table('yahoo_finance', engine.connect()) df = df.loc[(df['ticker'] == self.ticker) & (df['date'] > '2009-12-31') & (df['date'] < '2020-07-01')] df = df.drop(['ticker', 'id'], axis=1) #1-1. Adding three more features: moving average, increase in volume(%), increase in adj_close(%) avg = [] df['moving_avg'] = df['adjclose'].rolling(window=50, min_periods=0).mean() i = 1 vol_rate = [0] adjclose_rate = [0] while i < len(df): vol_rate.append((df.iloc[i]['volume'] - df.iloc[i - 1]['volume']) / df.iloc[i - 1]['volume']) adjclose_rate.append( (df.iloc[i]['adjclose'] - df.iloc[i - 1]['adjclose']) / df.iloc[i - 1]['adjclose']) i += 1 df['increase_rate_vol'] = vol_rate df['increase_rate_adjclose'] = adjclose_rate #1-2. Bring history of the other ticker df4 = pd.read_sql_table('yahoo_finance', engine.connect()) tic = [t for t in self.tickers if t != self.ticker] df4 = df4.loc[(df4['ticker'] == tic[0]) & (df4['date'] > '2009-12-31') & (df4['date'] < '2020-07-01')] df4 = df4.rename( columns={ 'open': tic[0] + '_open', 'high': tic[0] + '_high', 'low': tic[0] + '_low', 'close': tic[0] + '_close', 'adjclose': tic[0] + '_adjclose' }) df4 = df4.drop(['ticker', 'id', 'volume'], axis=1) temp_df = pd.DataFrame() df5 = pd.read_sql_table('korea_finance', engine.connect()) k_tickers = { 'lgchem': '051910', 'lginnotek': '011070' } #LG Chem & LG Innotek for k_tic, v in k_tickers.items(): df5 = pd.read_sql_table('korea_finance', engine.connect()) df5['date'] = pd.to_datetime(df5['date']) df5 = df5.loc[(df5['ticker'] == v) & (df5['date'] > '2009-12-31') & (df5['date'] < '2020-07-01')] df5 = df5.rename( columns={ 'open': k_tic + '_open', 'close': k_tic + '_close', 'high': k_tic + '_high', 'low': k_tic + '_low' }) df5 = df5.drop(['id', 'ticker', 'volume'], axis=1) df5 = df5[df5['date'].notnull() == True].set_index('date') temp_df = temp_df.join(df5, how='outer') temp_df['date'] = temp_df.index #2. Bring news sentiment if (self.ticker == 'AAPL'): apple_json = json.dumps(AppleSentiment.get()[0], default=lambda x: x.__dict__) df2 = pd.read_json(apple_json) df2 = df2.drop(['id', 'pos', 'neg', 'neu'], axis=1) else: tesla_json = json.dumps(TeslaSentiment.get()[0], default=lambda x: x.__dict__) df2 = pd.read_json(tesla_json) df2 = df2.drop(['id', 'pos', 'neg', 'neu'], axis=1) #3. Bring US_covid status per day covid_json = json.dumps(USCovids.get()[0], default=lambda x: x.__dict__) df3 = pd.read_json(covid_json) df3 = df3.loc[(df3['date'] > '2019-12-31') & (df3['date'] < '2020-07-01')] df3 = df3.drop( ['id', 'total_cases', 'total_deaths', 'ca_cases', 'ca_deaths'], axis=1) #4. Combine all features in one csv file df = df[df['date'].notnull() == True].set_index('date') df4 = df4[df4['date'].notnull() == True].set_index('date') temp_df = temp_df[temp_df['date'].notnull() == True].set_index('date') df2 = df2[df2['date'].notnull() == True].set_index('date') df3 = df3[df3['date'].notnull() == True].set_index('date') if self.ticker == 'AAPL': main_df = df.join(df4, how='outer') else: main_df = df.join(df4, how='inner') main_df = main_df.join(temp_df, how='outer') main_df = main_df.join(df2, how='outer') main_df = main_df.join(df3, how='outer') main_df[[ 'new_us_cases', 'new_us_deaths', 'new_ca_cases', 'new_ca_deaths' ]] = main_df[[ 'new_us_cases', 'new_us_deaths', 'new_ca_cases', 'new_ca_deaths' ]].fillna(value=0) # Fill Nan values in stock proices with interpolated values main_df = main_df.astype(float).interpolate() main_df = main_df.fillna(value=0) #6. Save to CSV file output_file = self.ticker + '_dataset.csv' result = os.path.join(self.path, output_file) main_df.to_csv(result) print(main_df) return main_df '''
def predict(self): K.clear_session() checkpoint_path = os.path.join(self.path, 'checkpoint.h5') with graph.as_default(): model = keras.models.load_model(checkpoint_path) model.summary() print('====================5===========================') ######### PREPARE FOR PREDICTION FEATURES ############### x = [] x.append(self.open) x.append(self.high) x.append(self.low) x.append((self.high + self.low) / 2) #'moving_avg' x.append(0.05) #'increase_rate_vol' x.append(0.05) #'increase_rate_adjclose' #the other NASDAQ stock price from the MarisDB apple_df = pd.read_sql_table('yahoo_finance', engine.connect()) op_tic = ['TSLA', 'AAPL'] tic = [t for t in op_tic if t != self.ticker] apple_df = apple_df.loc[(apple_df['ticker'] == tic[0])].iloc[-1] x.append(apple_df['open']) x.append(apple_df['high']) x.append(apple_df['low']) x.append(apple_df['close']) x.append(apple_df['adjclose']) #the other KOSPI stocks pricees from the MarisDB KOSPI = {'lgchem': '051910', 'lginnotek': '011070'} for k_tic, v in KOSPI.items(): kospi_df = pd.read_sql_table('korea_finance', engine.connect()) kospi_df = kospi_df.loc[(kospi_df['ticker'] == v)].iloc[-1] x.append(float(kospi_df['open'])) x.append(float(kospi_df['close'])) x.append(float(kospi_df['high'])) x.append(float(kospi_df['low'])) x.append(self.compound) #Append covid cases covid_json = json.dumps(USCovids.get()[0], default=lambda x: x.__dict__) covid_df = pd.read_json(covid_json) covid_df = covid_df.iloc[-1] x.append(covid_df['new_us_cases']) x.append(covid_df['new_us_deaths']) x.append(covid_df['new_ca_cases']) x.append(covid_df['new_ca_deaths']) x = [x] #normalize data scaler = MinMaxScaler(feature_range=(0, 1)) scaled = scaler.fit_transform(x) X = scaled.reshape((scaled.shape[0], 1, scaled.shape[1])) #predict the price and back to actual price price = model.predict(X) scaled[0, 0] = price[0, 0] unscaled = scaler.inverse_transform(scaled) return round(unscaled[0, 0], 2)