예제 #1
0
 def __init__(self, **kwarg):
     self.dbName = kwarg['dbName']
     self.colName = kwarg['collectionName']
     self.IP = kwarg['IP']
     self.PORT = kwarg['PORT']
     self.Prob = .10
     self.realtimeNewsURL = []
     self.tm = tm.TextMining(IP="10.3.5.58", PORT=27017)
 def __init__(self, **kwarg):
     self.ThreadsNum = kwarg['ThreadsNum']
     self.dbName = kwarg['dbName']
     self.colName = kwarg['collectionName']
     self.IP = kwarg['IP']
     self.PORT = kwarg['PORT']
     self.Prob = .5
     self.realtimeNewsURL = []
     self.tm = tm.TextMining(IP="localhost", PORT=27017)
 def __init__(self,*arg,**kwarg):
     self.totalPages = arg[0] #totalPages
     self.Range = arg[1] #Range
     self.ThreadsNum = kwarg['ThreadsNum']
     self.dbName = kwarg['dbName']
     self.colName = kwarg['collectionName']
     self.IP = kwarg['IP']
     self.PORT = kwarg['PORT']
     self.Porb = .5 
     self.realtimeNewsURL = []
     self.tm = tm.TextMining(IP="localhost",PORT=27017)
예제 #4
0
 def __init__(self, *arg, **kwarg):
     self.startDate = arg[0]
     self.endDate = arg[1]
     self.Range = arg[2]
     self.ThreadsNum = kwarg['ThreadsNum']
     self.dbName = kwarg['dbName']
     self.user = kwarg['user']
     self.passwd = kwarg['passwd']
     self.colName = kwarg['collectionName']
     self.IP = kwarg['IP']
     self.PORT = kwarg['PORT']
     self.Prob = .5
     self.realtimeNewsURL = []
     self.tm = tm.TextMining(IP="localhost", PORT=27017)
예제 #5
0
        web_crawl_obj = WebCrawlFromjrj("2009-01-05","2018-02-03",100,ThreadsNum=4,IP="localhost",PORT=27017,\
         dbName="Jrj_Stock",collectionName="jrj_news_company")
        web_crawl_obj.classifyRealtimeStockNews()
    elif web == 'cnstock':
        web_crawl_obj = WebCrawlFromcnstock(IP="localhost",PORT=27017,ThreadsNum=4,\
         dbName="Cnstock_Stock",collectionName="cnstock_news_company")
        web_crawl_obj.classifyRealtimeStockNews()
    elif web == 'stcn':
        web_crawl_obj = WebCrawlFromstcn(IP="localhost",PORT=27017,ThreadsNum=4,\
         dbName="Stcn_Stock",collectionName="stcn_news_company")
        web_crawl_obj.classifyRealtimeStockNews()


if __name__ == '__main__':
    # Step 1. Initiate
    text_mining_obj = tm.TextMining(IP="localhost", PORT=27017)

    # Step 2. Extract relevant stock codes of news(articles/documents) from all database
    text_mining_obj.extractStockCodeFromArticle(
        "NBD_Stock", "nbd_news_company")  # 从每经网的新闻中抽出相关的股票代码
    text_mining_obj.extractStockCodeFromArticle(
        "Cnstock_Stock", "cnstock_news_company")  # 从中国证券网的新闻中抽出相关的股票代码
    text_mining_obj.extractStockCodeFromArticle(
        "Stcn_Stock", "stcn_news_company")  # 从证券时报网的新闻中抽出相关的股票代码
    text_mining_obj.extractStockCodeFromArticle(
        "Jrj_Stock", "jrj_news_company")  # 从金融界网的新闻中抽出相关的股票代码

    # Step 3. Extract all news related to specific stock to new database(this step will take long time)
    codeLst = text_mining_obj.extractData("Stock", "Basic_Info", ['code']).code
    Range = 10
    Idx = 0