コード例 #1
0
 def downloading_csv(self, url_address):
     """ Download the csv information from the url_address given.
     """
     cache.clear()
     url = URL(url_address)
     f = open(self.cur_quotes_csvfile, 'wb')  # save as test.gif
     f.write(url.download())
     f.close()
コード例 #2
0
 def downloading_csv(self, url_address):
     """ Download the csv information from the url_address given.
     """
     cache.clear()
     url = URL(url_address)
     f = open(self.cur_quotes_csvfile, 'wb') # save as test.gif
     f.write(url.download())
     f.close()
コード例 #3
0
 def downloading_xml(self, url_address):
     """ Download the xml information from the url_address given.
     """
     cache.clear()
     url = URL(url_address)
     f = open(self.feeds_xmlfile, 'wb') # save as test.gif
     f.write(url.download())
     f.close()
コード例 #4
0
 def downloading_xml(self, url_address):
     """ Download the xml information from the url_address given.
     """
     cache.clear()
     url = URL(url_address)
     f = open(self.feeds_xmlfile, 'wb')  # save as test.gif
     f.write(url.download())
     f.close()
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is defaulted to the self.saved_json_file.

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, 'wb') # save as test.gif
        f.write(url.download(timeout = 50)) #increse the time out time for this
        f.close()
コード例 #6
0
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is defaulted to the self.saved_json_file.

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, 'wb')  # save as test.gif
        f.write(url.download(timeout=50))  #increse the time out time for this
        f.close()
コード例 #7
0
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is defaulted to the self.saved_json_file.
            Need take care of Exceptions

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, 'wb')  # save as test.gif
        try:
            str = url.download(timeout=50)
        except:
            str = ''
        f.write(str)  #increse the time out time for this
        f.close()
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is default to the self.saved_json_file.

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, "wb")  # save as test.gif
        try:
            url_data = url.download(timeout=50)
        except:
            url_data = ""

        f.write(url_data)
        f.close()
コード例 #9
0
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is default to the self.saved_json_file.

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, 'wb')  # save as test.gif
        try:
            url_data = url.download(timeout=50)
        except:
            url_data = ''

        f.write(url_data)
        f.close()
    def download_json(self):
        """ Download the json file from the self.com_data_full_url.
            The save file is defaulted to the self.saved_json_file.
            Need take care of Exceptions

        """
        cache.clear()
        url = URL(self.com_data_full_url)
        f = open(self.saved_json_file, 'wb') # save as test.gif
        try:
            str = url.download(timeout = 50)
        except:
            str = ''
        f.write(str) #increse the time out time for this
        f.close()
コード例 #11
0
 def parse_rss_sites(self):
     """ Function to parse the RSS sites.
         Results are stored in self.rss_results_dict with date as key.
     """
     self.rss_results_dict = {} 
     self.rss_title_list = []
     
     cache.clear()
     
     for rss_site_url in self.rss_sites:
         print "processing: ", rss_site_url
         try:
             results_list = Newsfeed().search(rss_site_url)[:self.num_feeds_parse_per_site]
         except:
             print 'Particular feeds have problems: ', rss_site_url
             continue
         for result in results_list:
             date_key = self.convert_date_str_to_date_key(result.date)
             self.rss_title_list.append(result.title)
             if self.rss_results_dict.has_key(date_key):
                 self.rss_results_dict[date_key].append([result.title,  plaintext(result.text)])
             else:
                 self.rss_results_dict[date_key] = [[result.title,  plaintext(result.text)]]
     print 'done'
コード例 #12
0
ファイル: scraper.py プロジェクト: gtank/storylinenews
def start(configfile):
    config = open(configfile, 'r')
    feedlist = json.loads(config.read())
    config.close()

    HOUR = 3600 #one hour
    DAY = 24

    datapath = 'data/'
    logdir = 'logs/'

    while True:   
        timestr = str(time.time())
        logpath = os.path.join(datapath, logdir)
        if not os.path.exists(logpath):
            os.makedirs(logpath)
        logname = os.path.join(logpath,'news-'+timestr+'.json')
        log = open(logname,'w')

        print timestr + ' starting a new day'
        for i in range(2):
            cache.clear()
            
            topics = feed.extract_topics(feedlist)
            topics = filter(metrics.isnews, topics)
            topics = map(lambda x: (x, metrics.gnews_polarity(x)), topics)

            data = (time.time(), topics)
            datastring = json.dumps(data)
            
            log.write(datastring + "\n")
            log.flush()
            print datastring
            
            time.sleep(12*HOUR)
        log.close()
コード例 #13
0
def start(configfile):
    config = open(configfile, 'r')
    feedlist = json.loads(config.read())
    config.close()

    HOUR = 3600  #one hour
    DAY = 24

    datapath = 'data/'
    logdir = 'logs/'

    while True:
        timestr = str(time.time())
        logpath = os.path.join(datapath, logdir)
        if not os.path.exists(logpath):
            os.makedirs(logpath)
        logname = os.path.join(logpath, 'news-' + timestr + '.json')
        log = open(logname, 'w')

        print timestr + ' starting a new day'
        for i in range(2):
            cache.clear()

            topics = feed.extract_topics(feedlist)
            topics = filter(metrics.isnews, topics)
            topics = map(lambda x: (x, metrics.gnews_polarity(x)), topics)

            data = (time.time(), topics)
            datastring = json.dumps(data)

            log.write(datastring + "\n")
            log.flush()
            print datastring

            time.sleep(12 * HOUR)
        log.close()
コード例 #14
0
        self.set_stock_retrieval_type('all')  #'all', watcher
        self.load_stock_symbol_fr_file()


if __name__ == '__main__':

    print "start processing"

    choice = 3

    if choice == 1:
        """try the download format of  YQL"""
        url_address = 'https://query.yahooapis.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.finance.keystats%20WHERE%20symbol%3D%27BN4.SI%27&format=json&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback='
        savefile = r'c:\data\temptryyql.json'

        cache.clear()
        url = URL(url_address)
        f = open(savefile, 'wb')  # save as test.gif
        f.write(url.download())
        f.close()

    if choice == 2:
        """ Handling Json file
            how to include the multiple keys per --> use  w['query']['results']['stats'].keys()
        """

        savefile = r'c:\data\temptryyql.json'
        w = json.load(open(r'c:\data\temptryyql.json', 'r'))
        com_data_stock_list = list()
        for indivdual_set in w['query']['results']['stats']:
            temp_dict_data = {}
コード例 #15
0
    enrolTwitter(topic, location2)
    print "Individual sentiment and modality committed."
    print
    print
    print "Calculating local and global sentiments and certainty.."
    print
    avgPol1, avgSub1, avgMode1 = localSentiment(location1, topic)
    print
    print location1, "statistics-\n Polarity:", avgPol1, "\n Subjectivity: ", avgSub1, "\n Modality:", avgMode1
    print
    avgPol2, avgSub2, avgMode2 = localSentiment(location2, topic)
    print
    print location2, "statistics-\n Polarity:", avgPol2, "\n Subjectivity: ", avgSub2, "\n Modality:", avgMode2
    print
    globalPol, globalSub, globalMode = globalSentiment(topic)
    print
    print "Global statistics -\n Polarity:", globalPol, "\n Subjectivity: ", globalSub, "\n Modality:", globalMode
    print
    time.sleep(1)
    # explanatory function to communicate the idea behind the statistics acquired
    forecast(topic, location1, location2, avgPol1, avgSub1, avgMode1, avgPol2,
             avgSub2, avgMode2)
    # PLUS : explanatory graph to communicate the scale of opinion
    print
    print "Creating plots and subplots.."
    print
    time.sleep(2)
    plotting(topic, location1, location2, avgPol1, avgSub1, avgMode1, avgPol2,
             avgSub2, avgMode2, globalPol, globalSub, globalMode)
    cache.clear()  # empties the local search bin before terminating
コード例 #16
0
            Using textblob and positive classifiier
            Need to create to take in file as training set...
            see can do it with different label

        """
        filename = r"C:\Users\Tan Kok Hua\Dropbox\Notes\stock_notes\relevency_classifier2.csv"
        data = get_strlist_fr_file(filename)

    if choice == 1:
        rss_sites  = [
                        'http://business.asiaone.com/rss.xml',


                    ]

        cache.clear()
        print
        for rss_site_url in rss_sites:
            for result in Newsfeed().search(rss_site_url)[:10]:
                print rss_site_url
                print result.date
                print result.title
                print plaintext(result.text)

                print
            print '-'*18
            print

    if choice ==4:
        
        #f = FeedsReader()