def main(outsourcedLogger=None): if outsourcedLogger is None: logger = Log.Logger() else: logger = outsourcedLogger logger.printAndLog(const.MessageType.Summarize.value, "Gathering data...") twitterService = TwitterCrawler(logger) # Fetch tweets logger.printAndLog(const.MessageType.Header.value, "Collecting data from twitter...") twitterService.crawlTwitter() # Merge files operations = DataBaseOperationsService(logger) logger.printAndLog(const.MessageType.Header.value, "Merging new data with current database...") operations.merge() # Plot statistics statistics = DataBaseStatistics(logger) statistics.PublishDataBaseCompaniesGraph() statistics.PublishDataBaseCompaniesKeywordsGraph() # Done logger.printAndLog(const.MessageType.Summarize.value, "Gathering finished...")
# Crawl for irrel. #---------------- # Configurations file xml of the crawler configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Irrel.xml" # Feeds file log feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_irrel.xml" # Update rate log file updateRateLogFile = ".\\TwitterCrawler\\Output\\update_rate_log_irrel.log" # Start the TwitterCrawler #------------------------- twitterCrawler = TwitterCrawler(configFileCrawler, feedsLogFile, updateRateLogFile, serializatoinFileNameIrrel) # Start crawling #twitterCrawler.Crawl('') # Crawl for rel #-------------- # Configurations file xml of the crawler configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Rel.xml" # Feeds file log feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_rel.xml" # Update rate log file
# Crawl for irrel. #---------------- # Configurations file xml of the crawler configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Stock_By_Stock.xml" # Feeds file log feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_" + stock + ".xml" # Update rate log file updateRateLogFile = ".\\TwitterCrawler\\Output\\update_rate_log_" + stock + "_irrel.log" # Start the TwitterCrawler #------------------------- twitterCrawler = TwitterCrawler(configFileCrawler, feedsLogFile, updateRateLogFile, serializatoinFileName) # The query array is just the current stock twitterCrawler.queryArray = [] #lexicon_query = stock + " AND (% OR مؤشر OR سهم OR اسهم OR أسهم OR نقطة OR تداول OR مال OR اموال OR أموال OR ريال OR سوق OR أ ارتفع OR ارتفاع OR انخفض OR انخفاض)" lexicon_query = stock + " AND (سهم OR اسهم OR أسهم OR نقطة OR تداول)" #lexicon_query = stock + " AND (سهم OR تداول)" #lexicon_query = stock + " AND (سهم OR شركة)" #lexicon_query = stock + " AND (سهم OR اسهم OR أسهم)" #lexicon_query = stock twitterCrawler.queryArray.append(lexicon_query) # Start crawling twitterCrawler.Crawl('')
Created on Dec 11, 2014 @author: asallab ''' import os os.environ["DJANGO_SETTINGS_MODULE"] = "DjangoWebProject1.settings" #print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXx") #print(os.environ.get("DJANGO_SETTINGS_MODULE")) from TwitterCrawler.TwitterCrawler import TwitterCrawler PROJECT_DIR = os.path.abspath(os.path.dirname(__file__)) configFileName = os.path.join(PROJECT_DIR, 'TwitterCrawler', 'Configurations', 'Configurations.xml') updateRateFileName = 'update_rate.txt' f_in = open(os.path.join('.', 'TwitterCrawler', 'stocks.txt'), 'r', encoding='utf-8') lines = f_in.readlines() for line in lines: stock = line.strip() serializatoinFileName = ".\\TwitterCrawler\\Output\\results_" + stock + ".bin" twitterCrawler = TwitterCrawler(configFileName, None, updateRateFileName, serializatoinFileName) twitterCrawler.queryArray = [] twitterCrawler.queryArray.append( stock + " AND (سهم OR اسهم OR أسهم OR تداول OR ارتفع OR ارتفاع OR انخفض OR انخفاض OR هدف OR دعم OR ارتداد OR نسبة OR % OR %)" ) twitterCrawler.stock = stock twitterCrawler.Crawl("q")