Exemple #1
0
import __init__

from Kite import config

from Killua.denull import DeNull
from Killua.deduplication import Deduplication
from Killua.buildstocknewsdb import GenStockNewsDB

from Gon.jrjspyder import JrjSpyder

# 1. 爬取历史数据
jrj_spyder = JrjSpyder(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ)
jrj_spyder.get_historical_news(config.WEBSITES_LIST_TO_BE_CRAWLED_JRJ,
                               start_date="2015-01-01")

# 2. 针对历史数据进行去重清洗
Deduplication(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ).run()

# 3. 将历史数据中包含null值的行去掉
DeNull(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ).run()

# 4. 创建新的数据库,针对每一个股票,将所有涉及该股票的新闻都保存在新的数据库,并贴好"利好","利空"和"中性"标签
gen_stock_news_db = GenStockNewsDB()
gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME,
                                                    config.COLLECTION_NAME_JRJ)
Exemple #2
0
#
# nbd_spyder = NbdSpyder(config.DATABASE_NAME, config.COLLECTION_NAME_NBD)
# nbd_spyder.get_historical_news(684)

# 1.1 针对历史数据进行去重清洗
from Killua.deduplication import Deduplication
# Deduplication("finnewshunter", "cnstock").run()
# Deduplication("finnewshunter", "nbd").run()
# Deduplication("finnewshunter", "jrj").run()  # 暂时只有jrj需要去重

# 1.2 将历史数据中包含null值的行去掉
from Killua.denull import DeNull
# DeNull("finnewshunter", "cnstock").run()
# DeNull("finnewshunter", "nbd").run()
# DeNull("finnewshunter", "jrj").run()

# 2. 抽取出新闻中所涉及的股票,并保存其股票代码在collection中新的一列
from Leorio.tokenization import Tokenization
# tokenization = Tokenization(import_module="jieba", user_dict="./Leorio/financedict.txt")
# tokenization.update_news_database_rows(config.DATABASE_NAME, "cnstock")
# tokenization.update_news_database_rows(config.DATABASE_NAME, "nbd")
# tokenization.update_news_database_rows(config.DATABASE_NAME, "jrj")

# 3. 创建新的数据库,针对每一个股票,将所有涉及该股票的新闻都保存在新的数据库
from Killua.buildstocknewsdb import GenStockNewsDB
gen_stock_news_db = GenStockNewsDB()
# gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "cnstock")
gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "nbd")
# gen_stock_news_db.get_all_news_about_specific_stock("finnewshunter", "jrj")

# 4.
            param = 0.10
        elif 30 < n_days <= 60:
            param = 0.15
        if close_price_this_date is not None and close_price_n_days_later is not None:
            if (close_price_n_days_later - close_price_this_date) / close_price_this_date > param:
                return "利好"
            elif (close_price_n_days_later - close_price_this_date) / close_price_this_date < -param:
                return "利空"
            else:
                return "中性"
        else:
            return ""

    def _stock_news_nums_stat(self):
        cols_list = self.database.connect_database(config.ALL_NEWS_OF_SPECIFIC_STOCK_DATABASE).list_collection_names(session=None)
        for sym in cols_list:
            if self.database.get_collection(config.ALL_NEWS_OF_SPECIFIC_STOCK_DATABASE, sym).estimated_document_count() > config.MINIMUM_STOCK_NEWS_NUM_FOR_ML:
                self.redis_client.lpush("stock_news_num_over_{}".format(config.MINIMUM_STOCK_NEWS_NUM_FOR_ML), sym)


if __name__ == "__main__":
    from Kite import config
    from Killua.buildstocknewsdb import GenStockNewsDB

    gen_stock_news_db = GenStockNewsDB()
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_CNSTOCK)
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_NBD)
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ)

    # gen_stock_news_db.listen_redis_queue()
            param = 0.03
        elif 10 < n_days <= 15:
            param = 0.05
        elif 15 < n_days <= 30:
            param = 0.10
        elif 30 < n_days <= 60:
            param = 0.15
        if close_price_this_date is not None and close_price_n_days_later is not None:
            if (close_price_n_days_later -
                    close_price_this_date) / close_price_this_date > param:
                return "利好"
            elif (close_price_n_days_later -
                  close_price_this_date) / close_price_this_date < -param:
                return "利空"
            else:
                return "中性"
        else:
            return ""


if __name__ == "__main__":
    from Kite import config
    from Killua.buildstocknewsdb import GenStockNewsDB

    gen_stock_news_db = GenStockNewsDB()
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_CNSTOCK)
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_NBD)
    # gen_stock_news_db.get_all_news_about_specific_stock(config.DATABASE_NAME, config.COLLECTION_NAME_JRJ)

    gen_stock_news_db.listen_redis_queue()