Ejemplo n.º 1
0
def main(outsourcedLogger=None):

    if outsourcedLogger is None:
        logger = Log.Logger()
    else:
        logger = outsourcedLogger

    logger.printAndLog(const.MessageType.Summarize.value, "Gathering data...")
    twitterService = TwitterCrawler(logger)

    # Fetch tweets
    logger.printAndLog(const.MessageType.Header.value, "Collecting data from twitter...")
    twitterService.crawlTwitter()

    # Merge files
    operations = DataBaseOperationsService(logger)
    logger.printAndLog(const.MessageType.Header.value, "Merging new data with current database...")
    operations.merge()

    # Plot statistics
    statistics = DataBaseStatistics(logger)
    statistics.PublishDataBaseCompaniesGraph()
    statistics.PublishDataBaseCompaniesKeywordsGraph()

    # Done
    logger.printAndLog(const.MessageType.Summarize.value, "Gathering finished...")
Ejemplo n.º 2
0
    # Crawl for irrel.
    #----------------
    # Configurations file xml of the crawler
    configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Irrel.xml"

    # Feeds file log
    feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_irrel.xml"

    # Update rate log file
    updateRateLogFile = ".\\TwitterCrawler\\Output\\update_rate_log_irrel.log"

    # Start the TwitterCrawler
    #-------------------------
    twitterCrawler = TwitterCrawler(configFileCrawler, feedsLogFile,
                                    updateRateLogFile,
                                    serializatoinFileNameIrrel)

    # Start crawling
    #twitterCrawler.Crawl('')

    # Crawl for rel
    #--------------

    # Configurations file xml of the crawler
    configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Rel.xml"

    # Feeds file log
    feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_rel.xml"

    # Update rate log file
        # Crawl for irrel.
        #----------------
        # Configurations file xml of the crawler
        configFileCrawler = ".\\TwitterCrawler\\Configurations\\Configurations_Stock_By_Stock.xml"

        # Feeds file log
        feedsLogFile = ".\\TwitterCrawler\\Output\\feeds_" + stock + ".xml"

        # Update rate log file
        updateRateLogFile = ".\\TwitterCrawler\\Output\\update_rate_log_" + stock + "_irrel.log"

        # Start the TwitterCrawler
        #-------------------------
        twitterCrawler = TwitterCrawler(configFileCrawler, feedsLogFile,
                                        updateRateLogFile,
                                        serializatoinFileName)

        # The query array is just the current stock
        twitterCrawler.queryArray = []
        #lexicon_query = stock + " AND  (% OR مؤشر OR سهم OR اسهم OR أسهم OR نقطة OR تداول OR مال OR اموال OR أموال OR ريال OR سوق OR أ ارتفع OR ارتفاع OR انخفض OR انخفاض)"
        lexicon_query = stock + " AND (سهم OR اسهم OR أسهم OR نقطة OR تداول)"
        #lexicon_query = stock + " AND (سهم OR تداول)"
        #lexicon_query = stock + " AND (سهم OR شركة)"
        #lexicon_query = stock + " AND (سهم OR اسهم OR أسهم)"
        #lexicon_query = stock
        twitterCrawler.queryArray.append(lexicon_query)

        # Start crawling
        twitterCrawler.Crawl('')
Ejemplo n.º 4
0
Created on Dec 11, 2014

@author: asallab
'''

import os
os.environ["DJANGO_SETTINGS_MODULE"] = "DjangoWebProject1.settings"
#print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXx")
#print(os.environ.get("DJANGO_SETTINGS_MODULE"))

from TwitterCrawler.TwitterCrawler import TwitterCrawler
PROJECT_DIR = os.path.abspath(os.path.dirname(__file__))
configFileName = os.path.join(PROJECT_DIR, 'TwitterCrawler', 'Configurations',
                              'Configurations.xml')
updateRateFileName = 'update_rate.txt'
f_in = open(os.path.join('.', 'TwitterCrawler', 'stocks.txt'),
            'r',
            encoding='utf-8')
lines = f_in.readlines()
for line in lines:
    stock = line.strip()
    serializatoinFileName = ".\\TwitterCrawler\\Output\\results_" + stock + ".bin"
    twitterCrawler = TwitterCrawler(configFileName, None, updateRateFileName,
                                    serializatoinFileName)
    twitterCrawler.queryArray = []
    twitterCrawler.queryArray.append(
        stock +
        " AND (سهم OR اسهم OR أسهم OR تداول OR ارتفع OR ارتفاع OR انخفض OR انخفاض OR هدف OR دعم OR ارتداد OR نسبة OR % OR %)‎"
    )
    twitterCrawler.stock = stock
    twitterCrawler.Crawl("q")