import __init__ from Kite import config from Killua.denull import DeNull from Killua.deduplication import Deduplication from Killua.buildstocknewsdb import GenStockNewsDB from Gon.jrjspyder import JrjSpyder # 1. 爬取历史数据 jrj_spyder = JrjSpyder(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ) jrj_spyder.get_historical_news(config.WEBSITES_LIST_TO_BE_CRAWLED_JRJ, start_date="2015-01-01") # 2. 针对历史数据进行去重清洗 Deduplication(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ).run() # 3. 将历史数据中包含null值的行去掉 DeNull(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ).run() # 4. 创建新的数据库,针对每一个股票,将所有涉及该股票的新闻都保存在新的数据库,并贴好"利好","利空"和"中性"标签 gen_stock_news_db = GenStockNewsDB() gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ)
# # nbd_spyder = NbdSpyder(config.DATABASE_NAME, config.COLLECTION_NAME_NBD) # nbd_spyder.get_historical_news(684) # 1.1 针对历史数据进行去重清洗 from Killua.deduplication import Deduplication # Deduplication("finnewshunter", "cnstock").run() # Deduplication("finnewshunter", "nbd").run() # Deduplication("finnewshunter", "jrj").run() # 暂时只有jrj需要去重 # 1.2 将历史数据中包含null值的行去掉 from Killua.denull import DeNull # DeNull("finnewshunter", "cnstock").run() # DeNull("finnewshunter", "nbd").run() # DeNull("finnewshunter", "jrj").run() # 2. 抽取出新闻中所涉及的股票,并保存其股票代码在collection中新的一列 from Leorio.tokenization import Tokenization # tokenization = Tokenization(import_module="jieba", user_dict="./Leorio/financedict.txt") # tokenization.update_news_database_rows(config.DATABASE_NAME, "cnstock") # tokenization.update_news_database_rows(config.DATABASE_NAME, "nbd") # tokenization.update_news_database_rows(config.DATABASE_NAME, "jrj") # 3. 创建新的数据库,针对每一个股票,将所有涉及该股票的新闻都保存在新的数据库 from Killua.buildstocknewsdb import GenStockNewsDB gen_stock_news_db = GenStockNewsDB() # gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "cnstock") gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "nbd") # gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "jrj") # 4.
param = 0.10 elif 30 < n_days <= 60: param = 0.15 if close_price_this_date is not None and close_price_n_days_later is not None: if (close_price_n_days_later - close_price_this_date) / close_price_this_date > param: return "利好" elif (close_price_n_days_later - close_price_this_date) / close_price_this_date < -param: return "利空" else: return "中性" else: return "" def _stock_news_nums_stat(self): cols_list = self.database.connect_database(config.ALL_NEWS_OF_SPECIFIC_STOCK_DATABASE).list_collection_names(session=None) for sym in cols_list: if self.database.get_collection(config.ALL_NEWS_OF_SPECIFIC_STOCK_DATABASE, sym).estimated_document_count() > config.MINIMUM_STOCK_NEWS_NUM_FOR_ML: self.redis_client.lpush("stock_news_num_over_{}".format(config.MINIMUM_STOCK_NEWS_NUM_FOR_ML), sym) if __name__ == "__main__": from Kite import config from Killua.buildstocknewsdb import GenStockNewsDB gen_stock_news_db = GenStockNewsDB() # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_CNSTOCK) # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_NBD) # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ) # gen_stock_news_db.listen_redis_queue()
param = 0.03 elif 10 < n_days <= 15: param = 0.05 elif 15 < n_days <= 30: param = 0.10 elif 30 < n_days <= 60: param = 0.15 if close_price_this_date is not None and close_price_n_days_later is not None: if (close_price_n_days_later - close_price_this_date) / close_price_this_date > param: return "利好" elif (close_price_n_days_later - close_price_this_date) / close_price_this_date < -param: return "利空" else: return "中性" else: return "" if __name__ == "__main__": from Kite import config from Killua.buildstocknewsdb import GenStockNewsDB gen_stock_news_db = GenStockNewsDB() # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_CNSTOCK) # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_NBD) # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ) gen_stock_news_db.listen_redis_queue()