def percent_growth(dest_path): print('Calculate growth per quarter ...') sendMessage('Begin calculating net profit growth at {0}'.format(today)) df = pd.DataFrame() for f in os.listdir(dest_path): if '.csv' in f: # read file s = pd.read_csv(dest_path + f) avg_med = s.groupby('stock_label').agg({ 'percent_change': ['mean', 'median'] }).reset_index() avg_med.columns = avg_med.columns.map(''.join) avg_med['last_report'] = s.tail(1)['quarter'].values[0] avg_med['last_pct_change'] = s.tail(1)['percent_change'].values[0] if df.empty: df = df.append(avg_med, ignore_index=True) else: df = pd.concat([df, avg_med], ignore_index=True) selected = df[(df.percent_changemean > 0) & (df.percent_changemedian > 0)] selected.columns = [ 'stock_label', 'profit_growth_mean', 'profit_growth_median', 'last_report', 'last_pct_change' ] selected.to_csv( '../data/preprocessed/net_profit_growth/percent_growth_{0}.csv'.format( today), index=False) print('Finish calculating net profit growth.') sendMessage('Finish calculating net profit growth at {0}.'.format(today)) return
#!/usr/bin/python3 import os import time import pandas as pd from selenium.webdriver import Chrome from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import Select from db import pg_connect from slack_message import sendMessage os.chdir('/home/ubuntu/indofin-core/') today = pd.to_datetime('today').strftime('%Y-%m-%d') snapshot_at = pd.to_datetime('today').date() sendMessage('Begin cron job at {0}'.format(today)) # setup opts = Options() opts.set_headless() opts.add_argument('log-level=3') # suppress warning opts.add_argument('--no-sandbox') assert opts.headless browser = Chrome('/usr/bin/chromedriver', options=opts) browser.implicitly_wait(1) print(os.getcwd()) try: # open web page
os.chdir('/home/ubuntu/indofin/src/') source_path = '../data/raw/stock_data/' apikey = os.getenv('API_TOKEN') today = pd.to_datetime('today').strftime('%Y-%m-%d') stock_df = pd.read_csv('../data/raw/kode_saham_{0}.csv'.format(today))[['Kode', 'Nama']] selected = pd.read_csv('../data/preprocessed/net_profit_growth/percent_growth_{0}.csv'.format(today)) joined_df = pd.merge(stock_df, selected, left_on='Kode', right_on='stock_label') joined_df = joined_df[['stock_label', 'Nama', 'profit_growth_mean', 'profit_growth_median', 'last_report', 'last_pct_change']] low_threshold = joined_df.describe()['profit_growth_median'][5] pct_change_threshold = joined_df.describe()['last_pct_change'][5] print('Median growth: {0} %'.format(str(pct_change_threshold))) sendMessage('Median growth: {0} %'.format(str(np.round(pct_change_threshold, 2)))) selected_df = joined_df[(joined_df['profit_growth_median'] > low_threshold) & (joined_df['last_pct_change'] > 0)] s_list = [] s_mean = [] s_median = [] last_price = [] print('Begin get top consideration company.') sendMessage('Begin get top consideration company') for f in os.listdir(source_path): if '.csv' in f: try: s_data = pd.read_csv(source_path + f) s_change_mean = s_data.percent_gain.describe()[1]
today)) joined_df = pd.merge(stock_df, selected, left_on='Kode', right_on='stock_label') joined_df = joined_df[[ 'stock_label', 'Nama', 'profit_growth_mean', 'profit_growth_median' ]] low_threshold = joined_df.describe()['profit_growth_median'][5] selected_df = joined_df[joined_df['profit_growth_median'] > low_threshold] print('Begin get stock adjusted price.') sendMessage('Begin get stock adjusted price.') for stock in selected_df.stock_label: try: dt = pd.read_csv( 'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY_ADJUSTED&symbol={0}.JK&apikey={1}&datatype=csv' .format(stock, apikey), usecols=['timestamp', 'adjusted close']) dt.timestamp = pd.to_datetime(dt.timestamp) last_price = dt['adjusted close'][0] dt_sorted = dt.sort_values('timestamp').reset_index().drop( 'index', axis=1).copy() d = [] p = [] for i, s in dt_sorted.iterrows():
os.chdir('/home/ubuntu/indofin/src/') today = pd.to_datetime('today').strftime('%Y-%m-%d') # setup opts = Options() opts.set_headless() opts.add_argument('log-level=3') # suppress warning opts.add_argument('--no-sandbox') assert opts.headless browser = Chrome('/usr/bin/chromedriver', options=opts) browser.implicitly_wait(2) stock_list = pd.read_csv('../data/preprocessed/top_consideration/top_consideration_{0}.csv'.format(today)) sendMessage('Begin get EPS data.') print('Begin get EPS data.') for st in stock_list.stock_label: try: print('Open webpage for {0}'.format(st)) # open web page browser.get('https://www.indopremier.com/ipotmember/newsSmartSearch.php?code={0}'.format(st)) print('Click fundamental menu for {0}'.format(st)) # click menu fundamental browser.find_element_by_partial_link_text('fundamental').click() print('Select annual data for {0}'.format(st)) time.sleep(5) # select annual data select = Select(browser.find_element_by_id('quarterFundamental'))
df = pd.read_excel(url, sheet_name=3, skiprows=2, usecols='A:D') print(stock, year, quarter) stock_dt.append(stock) year_dt.append(year) q_dt.append(quarter) profit_dt.append( df[df[df.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0]) time.sleep(1) except Exception: print('File not found for {0} - {1} - {2}'.format( stock, quarter, str(year))) pass raw_data = pd.DataFrame.from_dict({ 'stock_label': stock_dt, 'year': year_dt, 'quarter': q_dt, 'profit': profit_dt }) # os.mkdir('../data/raw/{0}'.format(stock)) raw_data.to_csv('../data/raw/financial_information/{0}.csv'.format(stock), index=False) sendMessage('Initial load finished.')
stock = [] avg_eps = [] last_eps = [] for f in os.listdir(eps_path): if '.csv' in f: stock.append(f.split('.')[0]) eps_df = pd.read_csv(eps_path + f) avg_eps.append(eps_df.eps.mean()) last_eps.append(eps_df.eps[0]) eps_data = pd.DataFrame.from_dict({ 'stock':stock, 'last_6_avg_eps':avg_eps, 'last_eps':last_eps }) eps_data['avg_eps_25'] = 25 * eps_data.last_6_avg_eps eps_data['last_eps_20'] = 20 * eps_data.last_eps merged_data = pd.merge(top_df, eps_data, left_on='stock_label', right_on='stock', how='left') merged_data.drop(['stock'], axis=1, inplace=True) merged_data['diff_percent_with_avg_eps'] = round(100.0 * ((merged_data.last_price - merged_data.avg_eps_25) / merged_data.last_price), 2) merged_data['diff_percent_with_last_eps'] = round(100.0 * ((merged_data.last_price - merged_data.last_eps_20) / merged_data.last_price), 2) selected_data = merged_data[(merged_data.diff_percent_with_avg_eps < 0.0) & (merged_data.diff_percent_with_last_eps < 0.0)].copy() selected_data.to_csv('../data/output/final_data_{0}.csv'.format(today), index=False) sm.sendMessage('Uploading output file') sm.uploadFile('../data/output/final_data_{0}.csv'.format(today), 'Output File')
skiprows=2, usecols='A:D') print(stock, year, quarter) profit = df[df[df.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0] df = pd.DataFrame.from_dict({ 'stock_label': [stock], 'year': [int(last_year)], 'quarter': [quarter], 'profit': [profit] }) df.to_csv('{0}{1}.csv'.format(source_path, stock), index=False) counter += 1 except Exception as e: print('File not found for {0} - {1} - {2}'.format( stock, quarter, str(last_year))) print('Error Type:', e.__class__.__name__) print('Error Message:', e) time.sleep(1) pass print('There are {0} companies already submitted {1} report.'.format( str(counter), quarter)) return counter, quarter sendMessage('Begin incremental load at {0}'.format(date)) counter, quarter = incremental_load(stock_list, existing) sendMessage('There are {0} companies already submitted {1} report.'.format( str(counter), quarter))
def get_last_profit(company): today = datetime.today() last_profit_q = ''' SELECT * FROM "fundamental"."{company_code}" '''.format(company_code=company) print('Processing for {0}'.format(company)) # sendMessage('Processing for {0}'.format(company)) try: conn = pg_connect() last_profit = pd.read_sql(last_profit_q, conn).tail(1).reset_index() last_profit_report = last_profit.net_profit_report[0] last_quarter = last_profit.quarter[0] if last_quarter == 'TW3' and last_profit.year[ 0] == today.year - 1 and today.month in range(1, 7): new_data = process_response(company=company, last_quarter=last_quarter, last_profit_report=last_profit_report) new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) elif last_quarter == 'Tahunan' and last_profit.year[ 0] == today.year - 1 and today.month in range(4, 10): new_data = process_response(company=company, last_quarter=last_quarter, last_profit_report=None) new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) elif last_quarter == 'TW1' and last_profit.year[ 0] == today.year and today.month in range(7, 12): new_data = process_response(company=company, last_quarter=last_quarter, last_profit_report=last_profit_report) new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) elif last_quarter == 'TW2' and last_profit.year[0] == today.year and ( today.month in range(10, 13) or today.month in range(1, 4)): new_data = process_response(company=company, last_quarter=last_quarter, last_profit_report=last_profit_report) new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) except IndexError as e: print('Empty table for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) last_year = today.year - 1 if today.month in range(1, 7): try: new_data = process_response(company=company, last_quarter='TW3', last_profit_report=0) new_data.loc[0, 'net_profit_quarter'] = None new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif today.month in range(4, 10): try: new_data = process_response(company=company, last_quarter='Tahunan', last_profit_report=0) new_data.loc[0, 'net_profit_quarter'] = None new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif today.month in range(7, 12): try: new_data = process_response(company=company, last_quarter='TW1', last_profit_report=0) new_data.loc[0, 'net_profit_quarter'] = None new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif today.month in [1, 2, 3, 10, 11, 12]: try: new_data = process_response(company=company, last_quarter='TW2', last_profit_report=0) new_data.loc[0, 'net_profit_quarter'] = None new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) new_data.to_sql('all_fundamental'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass except AttributeError as e: print('Error Type:', e.__class__.__name__) # print('Error Message:', e) pass except s.exc.ProgrammingError as e: print('Table not exist for {0}'.format(company)) print('Error Type:', e.__class__.__name__) new_data = pd.DataFrame({ 'company_code': [], 'quarter': [], 'year': [], 'net_profit_report': [], 'net_profit_quarter': [] }) new_data.to_sql('{0}'.format(company), conn, schema='fundamental', if_exists='append', index=False) # print(new_data) pass except Exception as e: # print('File not found for {0}'.format(company)) print('Error Class:', e.__class__) print('Error Type:', e.__class__.__name__) print('Error Message:', e) sendMessage('Error Class: {0}'.format(e.__class__)) sendMessage('Error Type: {0}'.format(e.__class__.__name__)) sendMessage('Error Message: {0}'.format(e)) pass
def process_response(company, last_quarter, last_profit_report): today = datetime.today() last_year = today.year - 1 if last_quarter == 'TW3': try: url_main = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/Audit/{stock}/FinancialStatement-{year}-Tahunan-{stock}.xlsx'.format( stock=company, year=last_year) response_main = get(url_main) if response_main.status_code == 200: # if Excel file submitted quarter_data = pd.read_excel(url_main, sheet_name=3, skiprows=2, usecols='A:D') profit = quarter_data[quarter_data[quarter_data.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0] new_data = pd.DataFrame({ 'company_code': [company], 'quarter': ['Tahunan'], 'year': [last_year], 'net_profit_report': [profit], 'net_profit_quarter': [profit - last_profit_report] }) return new_data elif response_main.status_code != 200: try: url_pdf = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/Audit/{stock}/FinancialStatement-{year}-Tahunan-{stock}.pdf'.format( stock=company, year=last_year) response_pdf = get(url_pdf) # check if PDF submitted if response_pdf.status_code == 200: print('{0} submitted in PDF for {1} annual report'. format(company, last_year)) sendMessage( '{0} submitted in PDF for {1} annual report'. format(company, last_year)) except Exception as e: print('PDF file not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif last_quarter == 'Tahunan': try: url_main = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-I-{stock}.xlsx'.format( stock=company, year=str(today.year), quarter='TW1') response_main = get(url_main) if response_main.status_code == 200: # if Excel file submitted quarter_data = pd.read_excel(url_main, sheet_name=3, skiprows=2, usecols='A:D') profit = quarter_data[quarter_data[quarter_data.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0] new_data = pd.DataFrame({ 'company_code': [company], 'quarter': ['TW1'], 'year': [today.year], 'net_profit_report': [profit], 'net_profit_quarter': [profit] }) return new_data elif response_main.status_code != 200: try: url_pdf = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-I-{stock}.pdf'.format( stock=company, year=str(today.year), quarter='TW1') response_pdf = get(url_pdf) # check if PDF submitted if response_pdf.status_code == 200: print( '{0} submitted in PDF for TW1 - {1} report'.format( company, str(today.year))) sendMessage( '{0} submitted in PDF for TW1 - {1} report'.format( company, str(today.year))) except Exception as e: print('PDF file not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif last_quarter == 'TW1': try: url_main = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-II-{stock}.xlsx'.format( stock=company, year=str(today.year), quarter='TW2') response_main = get(url_main) if response_main.status_code == 200: # if Excel file submitted quarter_data = pd.read_excel(url_main, sheet_name=3, skiprows=2, usecols='A:D') profit = quarter_data[quarter_data[quarter_data.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0] new_data = pd.DataFrame({ 'company_code': [company], 'quarter': ['TW2'], 'year': [today.year], 'net_profit_report': [profit], 'net_profit_quarter': [profit - last_profit_report] }) return new_data elif response_main.status_code != 200: try: url_pdf = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-II-{stock}.pdf'.format( stock=company, year=str(today.year), quarter='TW2') response_pdf = get(url_pdf) # check if PDF submitted if response_pdf.status_code == 200: print( '{0} submitted in PDF for TW2 - {1} report'.format( company, str(today.year))) sendMessage( '{0} submitted in PDF for TW2 - {1} report'.format( company, str(today.year))) except Exception as e: print('PDF file not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass elif last_quarter == 'TW2': try: url_main = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-III-{stock}.xlsx'.format( stock=company, year=str(today.year), quarter='TW3') response_main = get(url_main) if response_main.status_code == 200: # if Excel file submitted quarter_data = pd.read_excel(url_main, sheet_name=3, skiprows=2, usecols='A:D') profit = quarter_data[quarter_data[quarter_data.columns[3]] == 'Total profit (loss)'].iloc[:, 1].values[0] new_data = pd.DataFrame({ 'company_code': [company], 'quarter': ['TW2'], 'year': [today.year], 'net_profit_report': [profit], 'net_profit_quarter': [profit - last_profit_report] }) return new_data elif response_main.status_code != 200: try: url_pdf = 'https://www.idx.co.id/Portals/0/StaticData/ListedCompanies/Corporate_Actions/New_Info_JSX/Jenis_Informasi/01_Laporan_Keuangan/02_Soft_Copy_Laporan_Keuangan//Laporan%20Keuangan%20Tahun%20{year}/{quarter}/{stock}/FinancialStatement-{year}-III-{stock}.pdf'.format( stock=company, year=str(today.year), quarter='TW3') response_pdf = get(url_pdf) # check if PDF submitted if response_pdf.status_code == 200: print( '{0} submitted in PDF for TW3 - {1} report'.format( company, str(today.year))) sendMessage( '{0} submitted in PDF for TW3 - {1} report'.format( company, str(today.year))) except Exception as e: print('PDF file not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass except Exception as e: print('File not found for {0}'.format(company)) print('Error Type:', e.__class__.__name__) print('Error Message:', e) pass
except Exception as e: # print('File not found for {0}'.format(company)) print('Error Class:', e.__class__) print('Error Type:', e.__class__.__name__) print('Error Message:', e) sendMessage('Error Class: {0}'.format(e.__class__)) sendMessage('Error Type: {0}'.format(e.__class__.__name__)) sendMessage('Error Message: {0}'.format(e)) pass if __name__ == "__main__": print_date = datetime.today().date().strftime('%Y-%m-%d') print('Begin incremental fundamental load at {0}'.format(print_date)) sendMessage('Begin incremental fundamental load at {0}'.format(print_date)) conn = pg_connect() active_company_q = ''' SELECT company_code FROM "company"."active_company" ORDER BY 1 ''' active_company = pd.read_sql(active_company_q, conn) company_code_list = list(set(active_company.company_code)) n_process = 5 #int(ceil(len(company_code_list) / 100)) mp = Pool(n_process) print('Start with {0} parallel'.format(n_process)) sendMessage('Start with {0} parallel processing'.format(n_process))