def auto_crawler_new(): ACSD = AutoCrawlerStockDividend() ACSD.main() # save crawler process print('save crawler process') BasedClass.save_crawler_process('StockDividend')
def auto_crawler_new(): ACII = AutoCrawlerInstitutionalInvestors() ACII.main() C2S = BasedClass.Crawler2SQL('InstitutionalInvestors', 'Financial_DataSet') C2S.upload2sql(ACII.data) print('save crawler process') BasedClass.save_crawler_process('InstitutionalInvestors')
def auto_crawler_new(): date_name = 'CrudeOilPrices' ACCOP = AutoCrawlerCrudeOilPrices() ACCOP.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') C2S.upload2sql(ACCOP.data) #------------------------------------------------- print('save crawler process') BasedClass.save_crawler_process('CrudeOilPrices')
def auto_crawler_new(): date_name = 'GoldPrice' CGP = CrawlerGoldPrice() CGP.main() C2S = BasedClass.Crawler2SQL(date_name,'Financial_DataSet') C2S.upload2sql(CGP.data) #------------------------------------------------- print('save crawler process') BasedClass.save_crawler_process(date_name)
def auto_crawler_new(): date_name = 'ExchangeRate' ACCOP = AutoCrawlerExchangeRate() ACCOP.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') C2S.upload2sql(ACCOP.data, no_float_col=['date', 'country']) #------------------------------------------------------ print('save crawler process') BasedClass.save_crawler_process('ExchangeRate')
def auto_crawler_new(): date_name = 'InterestRate' ACIR = AutoCrawlerInterestRate() ACIR.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') C2S.upload2sql(ACIR.data, no_float_col=['country', 'full_country_name', 'date']) #------------------------------------------------- print('save crawler process') BasedClass.save_crawler_process('InterestRate')
def auto_crawler_new(): date_name = 'GovernmentBonds' self = AutoCrawlerGovernmentBonds() self.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') C2S.upload2sql(self.data, no_float_col=['Date', 'data_name', 'country', 'curr_id']) #------------------------------------------------- print('save crawler process') BasedClass.save_crawler_process(date_name)
def auto_crawler_new(): dataset_name = 'StockPrice' self = AutoCrawlerStockPrice() self.main() print('crawler data and upload 2 sql') C2S = BasedClass.Crawler2SQL(dataset_name, 'Financial_DataSet') C2S.upload2sql(self.new_data, no_float_col=['date', 'stock'], int_col=['Volume']) #------------------------------------------------------ print('save crawler process') BasedClass.save_crawler_process('StockPrice')
def crawler_history(): CCOP = CrawlerCrudeOilPrices() CCOP.main() #CII.data C2S = BasedClass.Crawler2SQL('CrudeOilPrices', 'Financial_DataSet') try: C2S.create_table(CCOP.data.columns) except: 123 C2S.upload2sql(CCOP.data) print('create process table') BasedClass.create_datatable('CrudeOilPrices')
def crawler_history(): date_name = 'ExchangeRate' CER = CrawlerExchangeRate() CER.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') try: C2S.create_table(CER.data.columns, text_col=['country']) except: 123 C2S.upload2sql(CER.data, no_float_col=['date', 'country']) print('create process table') BasedClass.create_datatable('ExchangeRate')
def main(): database = 'Financial_DataSet' CSID = CrawlerStockID(Key.host, Key.user, Key.password, database) CSID.run() C2S = BasedClass.Crawler2SQL('StockInfo', 'Financial_DataSet') try: C2S.create_table(CSID.data.columns) except: 123 # upload stock info BasedClass.execute_sql2(database, 'TRUNCATE table `StockInfo` ') CSID.upload_stock_info2sql()
def crawler_history(): CII = CrawlerInstitutionalInvestors() CII.main() #CII.data C2S = BasedClass.Crawler2SQL('InstitutionalInvestors', 'Financial_DataSet') try: C2S.create_table(CII.data.columns) except: 123 C2S.upload2sql(CII.data) print('create process table') BasedClass.create_datatable('InstitutionalInvestors')
def crawler_history(): date_name = 'GovernmentBonds' self = CrawlerGovernmentBonds() self.main() C2S = BasedClass.Crawler2SQL(date_name, 'Financial_DataSet') try: C2S.create_table(self.data.columns, text_col=['data_name', 'country', 'curr_id']) except: 123 C2S.upload2sql(self.data, no_float_col=['Date', 'data_name', 'country', 'curr_id']) print('create process table') BasedClass.create_datatable(date_name)
def get_curr_id_name(self): self.data_name = [] curr_id = BasedClass.execute_sql2( self.database, 'SELECT DISTINCT `curr_id` FROM `EnergyFuturesPrices` WHERE 1') self.curr_id = [c[0] for c in curr_id] for c in self.curr_id: #c = country[0] sql_text = ( 'SELECT DISTINCT `data_name` FROM `EnergyFuturesPrices` ' + 'WHERE `curr_id` = "' + c + '"') value = BasedClass.execute_sql2(self.database, sql_text) value = [d[0] for d in value] [self.data_name.append(v) for v in value]
def crawler_history(): CIR = CrawlerInterestRate() CIR.main() C2S = BasedClass.Crawler2SQL('InterestRate', 'Financial_DataSet') try: C2S.create_table(CIR.data.columns, text_col=['country', 'full_country_name']) except: 123 C2S.upload2sql(CIR.data, no_float_col=['country', 'full_country_name', 'date']) print('create process table') BasedClass.create_datatable('InterestRate')
def get_stock_id_set(self): data = BasedClass.execute_sql2( database = self.database, sql_text = 'SELECT distinct `stock_id` FROM FinancialStatements') self.stock_id_set = [ da[0] for da in data]
def crawler_history(): CFS = CrawlerFinancialStatements() CFS.crawler() CFS.fix() CFS.stock_financial_statements['year'] = CFS.stock_financial_statements['year'] + 1911 C2S = BasedClass.Crawler2SQL('FinancialStatements','Financial_DataSet') try: C2S.create_table() except: 123 C2S.upload2sql(CFS.stock_financial_statements, no_float_col = ['stock_id','url'], int_col = ['year','quar']) print('create process table') BasedClass.create_datatable('FinancialStatements')
def crawler_history(): CTD = CrawlerStockDividend() CTD.main() C2S = BasedClass.Crawler2SQL('StockDividend','Financial_DataSet') try: C2S.create_table() except: 123 for i in range(len(CTD.url_set)): print(str(i)+'/'+str(len(CTD.url_set)))#i=0 data = CTD.get_value(i) C2S.upload2sql(data, no_float_col = ['meeting_data', 'Ex_right_trading_day', 'Ex_dividend_transaction_day', 'stock_id']) print('create process table') BasedClass.create_datatable('StockDividend')
def auto_crawler_new(): ACFS = AutoCrawlerFinancialStatements(database = 'Financial_DataSet') # self = ACFS ACFS.main() if len(ACFS.stock_financial_statements) != 0 : try: ACFS.fix() except: 123 if ACFS.stock_financial_statements.columns[0] == 0: ACFS.stock_financial_statements = ACFS.stock_financial_statements.T C2S = BasedClass.Crawler2SQL('FinancialStatements','Financial_DataSet') C2S.upload2sql(ACFS.stock_financial_statements, no_float_col = ['stock_id','url'], int_col = ['year','quar']) #------------------------------------------------------ print('save crawler process') BasedClass.save_crawler_process('FinancialStatements')
def __init__(self): self.host = Key.host self.user = Key.user self.password = Key.password self.database = 'python' tem = BasedClass.execute_sql2(self.database, 'SHOW TABLES') tem = np.concatenate(tem, axis=0) self.datatable = [te for te in tem] self.datatable.remove('new')
def crawler_history(): date_name = 'GoldPrice' # get hostory by download https://www.gold.org/data/gold-price file file_path = '/home/' + path + '/github/FinancialMining/CrawlerCode/' data = pd.read_csv(file_path + 'glod.csv',skiprows = 1) date = [ datetime.datetime.strptime(d,'%Y/%m/%d').date() for d in data.date ] data = data[ [ d < datetime.datetime.strptime('2018-1-1','%Y-%m-%d').date() for d in date ] ] data['date'] = [ d.replace('/','-') + ' 00:00:00' for d in data.date ] data.columns = ['datetime','Price'] C2S = BasedClass.Crawler2SQL(date_name,'Financial_DataSet') try: C2S.create_table(data.columns,dt_col = ['datetime']) except: 123 C2S.upload2sql( data ) print('create process table') BasedClass.create_datatable(date_name)
def get_curr_id_name(self): self.data_name = [] curr_id = BasedClass.execute_sql2( self.database, 'SELECT DISTINCT `curr_id` FROM `GovernmentBonds` WHERE 1') self.curr_id = [c[0] for c in curr_id] country = BasedClass.execute_sql2( self.database, 'SELECT DISTINCT `country` FROM `GovernmentBonds` WHERE 1') country = [c[0] for c in country] for c in country: #c = country[0] sql_text = ('SELECT DISTINCT `data_name` FROM `GovernmentBonds` ' + 'WHERE `country` = "' + c + '"') value = BasedClass.execute_sql2(self.database, sql_text) value = [d[0] for d in value] [self.data_name.append(c + ' ' + v) for v in value]
def start(self): date = [] for stock in self.stock: print(stock) sql = "SELECT MAX(`date`) FROM `StockPrice` WHERE `stock` = '" + stock + "'" tem = BasedClass.execute_sql2(database='Financial_DataSet', sql_text=sql) date.append(tem[0][0]) start = str(max(date)) return start
def crawler_history(): dataset_name = 'StockPrice' self = CrawlerHistoryStockPrice() #self.main() os.chdir('/home/linsam/github') self.data = pd.read_csv('test.csv') print('crawler data and upload 2 sql') C2S = BasedClass.Crawler2SQL(dataset_name, 'Financial_DataSet') try: C2S.create_table(self.data.columns, text_col=['stock'], BIGINT_col=['Volume']) except: 123 C2S.upload2sql(self.data, no_float_col=['date', 'stock'], int_col=['Volume']) print('create process table') BasedClass.create_datatable('StockPrice')
def get_new(self): def UPDATE_sql(host,user,password,database,sql_text): # text = sql_text conn = ( pymysql.connect(host = host, port = 3306, user = user, password = password, database = database, charset="utf8") ) cursor = conn.cursor() try: for i in range(len(sql_text)): cursor.execute(sql_text[i]) conn.commit() conn.close() return 1 except: conn.close() return 0 old_date = Load.Load(database = 'StockDividend', select = self.stock).sort_values('meeting_data') self.old_date = str( old_date.iloc[len(old_date)-1]['meeting_data'] ) self.new_date = self.new_data['meeting_data'] change_name = list( self.new_data.index ) sql_text = [] if self.old_date == self.new_date: [ change_name.remove(col) for col in ['meeting_data','stock_id'] ] self.get_data_id() for col in change_name: tem = self.change_sql_data(col) if tem != '': sql_text.append( tem ) # update new value, # because Ex_right_trading_day & Ex-dividend transaction day # always slower announcement UPDATE_sql(Key.host, Key.user, Key.password, self.database, sql_text) elif self.old_date < self.new_date: # if new date > old data, then add new data data = pd.DataFrame(self.new_data) data = data.T C2S = BasedClass.Crawler2SQL('StockDividend','Financial_DataSet') C2S.upload2sql(data, no_float_col = ['meeting_data', 'Ex_right_trading_day', 'Ex_dividend_transaction_day', 'stock_id'])
def CrawlerStatus(self): self.cdate = pd.DataFrame() for dt in self.datatable: text = "SELECT name,CrawlerDate FROM `" + dt + "` where id = ( SELECT max(id) FROM `" + dt + "` )" tem = BasedClass.execute_sql2(self.database, text) if len(tem) != 0: tem = pd.DataFrame(np.concatenate(tem, axis=0)) self.cdate = self.cdate.append(tem.T) self.cdate.index = range(len(self.cdate)) self.cdate.columns = ['name', 'date'] date = [d.date() for d in self.cdate['date']] self.cdate['date'] = date
def get_yearquar(self,stock): sql_text = "SELECT year,quar FROM FinancialStatements WHERE stock_id = " sql_text = sql_text + stock tem = BasedClass.execute_sql2( database = self.database, sql_text = sql_text) year,quar = [],[] for te in tem: year.append(te[0]-1911) quar.append(te[1]) return year,quar
def select_new_data(self): new_data = pd.DataFrame() for stock in self.stock: sql = "SELECT MAX(`date`) FROM `StockPrice` WHERE `stock` = '" + stock + "'" tem = BasedClass.execute_sql2(database='Financial_DataSet', sql_text=sql) max_date = tem[0][0] data = self.data[self.data['stock'] == stock] date = [ datetime.datetime.strptime(d, '%Y-%m-%d').date() > max_date for d in data['date'] ] new_data = new_data.append(data[date]) self.new_data = new_data
def create_stock(self): sql = 'SELECT DISTINCT `stock` FROM `StockPrice` WHERE 1' tem = BasedClass.execute_sql2(database='Financial_DataSet', sql_text=sql) self.stock = [t[0] for t in tem]
def get_data_id(self): sql_text = "SELECT id FROM `StockDividend` WHERE `meeting_data` = '" sql_text = sql_text + str(self.new_date) + "' AND `stock_id` LIKE " sql_text = sql_text + self.new_data['stock_id'] self.data_id = BasedClass.execute_sql2(self.database,sql_text)[0][0]