def __init__(self, **kwarg): self.dbName = kwarg['dbName'] self.colName = kwarg['collectionName'] self.IP = kwarg['IP'] self.PORT = kwarg['PORT'] self.Prob = .10 self.realtimeNewsURL = [] self.tm = tm.TextMining(IP="10.3.5.58", PORT=27017)
def __init__(self, **kwarg): self.ThreadsNum = kwarg['ThreadsNum'] self.dbName = kwarg['dbName'] self.colName = kwarg['collectionName'] self.IP = kwarg['IP'] self.PORT = kwarg['PORT'] self.Prob = .5 self.realtimeNewsURL = [] self.tm = tm.TextMining(IP="localhost", PORT=27017)
def __init__(self,*arg,**kwarg): self.totalPages = arg[0] #totalPages self.Range = arg[1] #Range self.ThreadsNum = kwarg['ThreadsNum'] self.dbName = kwarg['dbName'] self.colName = kwarg['collectionName'] self.IP = kwarg['IP'] self.PORT = kwarg['PORT'] self.Porb = .5 self.realtimeNewsURL = [] self.tm = tm.TextMining(IP="localhost",PORT=27017)
def __init__(self, *arg, **kwarg): self.startDate = arg[0] self.endDate = arg[1] self.Range = arg[2] self.ThreadsNum = kwarg['ThreadsNum'] self.dbName = kwarg['dbName'] self.user = kwarg['user'] self.passwd = kwarg['passwd'] self.colName = kwarg['collectionName'] self.IP = kwarg['IP'] self.PORT = kwarg['PORT'] self.Prob = .5 self.realtimeNewsURL = [] self.tm = tm.TextMining(IP="localhost", PORT=27017)
web_crawl_obj = WebCrawlFromjrj("2009-01-05","2018-02-03",100,ThreadsNum=4,IP="localhost",PORT=27017,\ dbName="Jrj_Stock",collectionName="jrj_news_company") web_crawl_obj.classifyRealtimeStockNews() elif web == 'cnstock': web_crawl_obj = WebCrawlFromcnstock(IP="localhost",PORT=27017,ThreadsNum=4,\ dbName="Cnstock_Stock",collectionName="cnstock_news_company") web_crawl_obj.classifyRealtimeStockNews() elif web == 'stcn': web_crawl_obj = WebCrawlFromstcn(IP="localhost",PORT=27017,ThreadsNum=4,\ dbName="Stcn_Stock",collectionName="stcn_news_company") web_crawl_obj.classifyRealtimeStockNews() if __name__ == '__main__': # Step 1. Initiate text_mining_obj = tm.TextMining(IP="localhost", PORT=27017) # Step 2. Extract relevant stock codes of news(articles/documents) from all database text_mining_obj.extractStockCodeFromArticle( "NBD_Stock", "nbd_news_company") # 从每经网的新闻中抽出相关的股票代码 text_mining_obj.extractStockCodeFromArticle( "Cnstock_Stock", "cnstock_news_company") # 从中国证券网的新闻中抽出相关的股票代码 text_mining_obj.extractStockCodeFromArticle( "Stcn_Stock", "stcn_news_company") # 从证券时报网的新闻中抽出相关的股票代码 text_mining_obj.extractStockCodeFromArticle( "Jrj_Stock", "jrj_news_company") # 从金融界网的新闻中抽出相关的股票代码 # Step 3. Extract all news related to specific stock to new database(this step will take long time) codeLst = text_mining_obj.extractData("Stock", "Basic_Info", ['code']).code Range = 10 Idx = 0