コード例 #1
0
 def __init__(self, output_dir):
     """
     Constructor
     :param output_dir: absolute filepath to output directory
     :return:
     """
     self.output_dir = output_dir
     self.text_writer = TextWriter(output_dir)
     # Set input data indices
     self.source_sent_pos = {'fb_post': 18, 'fb_comment': 19, 'yahoo': 20, 'twitter': 21}
     self.price_dir_indices = {-1: 14, 1: 15, 2: 16, 3: 17}
     self.day_delays = [-1, 1, 2, 3]
コード例 #2
0
 def __init__(self, output_dir, verbose=False):
     """
     :param output_dir: Absolute filepath to output directory.
     :param verbose: boolean: Write info to console.
     :return:
     """
     self.dbmodel = BasicDbModel()
     self.s_analyzer = LexiconSentimentAnalyzer()
     self.text_writer = TextWriter(output_dir)   # writing CSV files
     self.verbose = verbose  # verbose output
     self.stock_processor = StockPriceProcessor()    # Object for price movements
     self.source_metrics_calculator = SourceMetricsCalculator(output_dir)
     self.total_metrics_calculator = TotalMetricsCalculator(output_dir)
     self.source_metrics_calculator_2_classes = SourceMetricsCalculator2classes(output_dir)
コード例 #3
0
class SourceMetricsCalculator(object):

    def __init__(self, output_dir):
        """
        Constructor
        :param output_dir: absolute filepath to output directory
        :return:
        """
        self.output_dir = output_dir
        self.text_writer = TextWriter(output_dir)
        # Set input data indices
        self.source_sent_pos = {'fb_post': 18, 'fb_comment': 19, 'yahoo': 20, 'twitter': 21}
        self.price_dir_indices = {-1: 14, 1: 15, 2: 16, 3: 17}
        self.day_delays = [-1, 1, 2, 3]

    def calculate_metrics_by_source(self, company_id, total_data, file_name, price_type, write_header=False):
        """
        Calculate metrics for one company (from all available days).
        :param company_id: int
        :param total_data: list
        :return: list
        """
        # For every source, get all delays and calculate metrics.
        company_stats = self._prepare_matrices_for_sources()
        company_stats = self._fill_result_matrices(company_stats, total_data)
        metrics = self._calc_metrics_from_matrices(company_stats)
        m_list = self._format_source_metrics_to_list(company_id, metrics, price_type)
        # Write to file
        if write_header:
            m_list.insert(0, self.get_source_metrics_header())
            self.text_writer.write_econometric_file(file_name, m_list, 'w')
            del(m_list[0])
        else:
            self.text_writer.write_econometric_file(file_name, m_list, 'a')
        # the end

    def _calc_metrics_from_matrices(self, company_stats):
        metrics = {}
        # For every source
        for source in company_stats:
            metrics[source] = {}
            # For every delay
            for delay, matrix in company_stats[source].items():
                d_stats = {}
                # Accuracy - only one for the whole matrix.
                total_values_count = sum(matrix.values())
                total_correct_count = matrix['pos_up'] + matrix['neg_down'] + matrix['neu_const']
                if total_values_count == 0:
                    accuracy = None
                else:
                    accuracy = total_correct_count / total_values_count
                d_stats['accuracy'] = accuracy
                # Precision
                pp = matrix['pos_up'] + matrix['pos_down'] + matrix['pos_const']
                pn = matrix['neg_up'] + matrix['neg_down'] + matrix['neg_const']
                pc = matrix['neu_up'] + matrix['neu_down'] + matrix['neu_const']
                d_stats['precision_pos'] = None if pp == 0 else matrix['pos_up'] / pp
                d_stats['precision_neg'] = None if pn == 0 else matrix['neg_down'] / pn
                d_stats['precision_neu'] = None if pc == 0 else matrix['neu_const'] / pc
                # Precision average
                #weight_by = 3 if total_correct_count == 0 else total_correct_count
                #weights = (matrix['pos_up'], matrix['neg_down'], matrix['neu_const'])
                weight_by = 3
                weights = (1, 1, 1)
                d_stats['precision_avg'] = (float(d_stats['precision_pos'] or 0) * weights[0] +
                                            float(d_stats['precision_neg'] or 0) * weights[1] +
                                            float(d_stats['precision_neu'] or 0) * weights[2]) / weight_by
                # Recall
                rp = matrix['pos_up'] + matrix['neg_up'] + matrix['neu_up']
                rn = matrix['pos_down'] + matrix['neg_down'] + matrix['neg_down']
                rc = matrix['pos_const'] + matrix['neg_const'] + matrix['neu_const']
                d_stats['recall_pos'] = None if rp == 0 else matrix['pos_up'] / rp
                d_stats['recall_neg'] = None if rn == 0 else matrix['neg_down'] / rn
                d_stats['recall_neu'] = None if rc == 0 else matrix['neu_const'] / rc
                # Recall average
                #weight_by = 3 if total_correct_count == 0 else total_correct_count
                #weights = (matrix['pos_up'], matrix['neg_down'], matrix['neu_const'])
                weight_by = 3
                weights = (1, 1, 1)
                d_stats['recall_avg'] = (float(d_stats['recall_pos'] or 0) * weights[0] +
                                         float(d_stats['recall_neg'] or 0) * weights[1] +
                                         float(d_stats['recall_neu'] or 0) * weights[2]) / weight_by
                # Save to total data
                metrics[source][delay] = d_stats
        # result
        return metrics

    def _prepare_matrices_for_sources(self):
        # Main directory
        company_stats = {}
        # For every source
        for source in self.source_sent_pos:
            company_stats[source] = {}
            # For every delay create a confusion matrix.
            for i in self.day_delays:
                company_stats[source][i] = {
                    'pos_up': 0.0, 'pos_down': 0.0, 'pos_const': 0.0,
                    'neg_up': 0.0, 'neg_down': 0.0, 'neg_const': 0.0,
                    'neu_up': 0.0, 'neu_down': 0.0, 'neu_const': 0.0,
                }
        # result
        return company_stats

    def _fill_result_matrices(self, company_stats, total_data):
        # Process all days
        for day in total_data:
            # For every source
            for source in self.source_sent_pos:
                # For every delay update the confusion matrix.
                for i in self.day_delays:
                    price_mov = day[self.price_dir_indices[i]]
                    # Skip FALSE price movements.
                    if not price_mov:
                        continue
                    company_stats[source][i][day[self.source_sent_pos[source]] + '_' + price_mov] += 1
        # result
        return company_stats

    def _format_source_metrics_to_list(self, company_id, metrics, price_type):
        """
        Format metrics to one list (one line).
        :param metrics: dictionary of sources
        :return:
        """
        company_rows = []
        # For every source
        for source in sorted(metrics.keys()):
            # For every delay
            for delay in sorted(metrics[source].keys()):
                row_data = [company_id, source, price_type, delay]
                d_data = metrics[source][delay]
                row_data.extend([
                    d_data['accuracy'], d_data['precision_avg'], d_data['recall_avg'],
                    d_data['precision_pos'], d_data['precision_neg'], d_data['precision_neu'],
                    d_data['recall_pos'], d_data['recall_neg'], d_data['recall_neu'],
                ])
                company_rows.append(row_data)
        # Result
        return company_rows

    def get_source_metrics_header(self):
        """
        One row ... one source and one delay.
        """
        header = [
            'company_id', 'source', 'price_type', 'delay',
            'accuracy', 'precision_avg', 'recall_avg',
            'precision_pos', 'precision_neg', 'precision_neu',
            'recall_pos', 'recall_neg', 'recall_neu',
        ]
        # result
        return header
コード例 #4
0
class DocumentsAnalyzer(object):

    def __init__(self, output_dir, verbose=False):
        """
        :param output_dir: Absolute filepath to output directory.
        :param verbose: boolean: Write info to console.
        :return:
        """
        self.dbmodel = BasicDbModel()
        self.s_analyzer = LexiconSentimentAnalyzer()
        self.text_writer = TextWriter(output_dir)   # writing CSV files
        self.verbose = verbose  # verbose output
        self.stock_processor = StockPriceProcessor()    # Object for price movements
        self.source_metrics_calculator = SourceMetricsCalculator(output_dir)
        self.total_metrics_calculator = TotalMetricsCalculator(output_dir)
        self.source_metrics_calculator_2_classes = SourceMetricsCalculator2classes(output_dir)

    ## Analyze output file

    def analyze_all_companies(self, from_date, to_date, file_name, price_type, const_boundaries, used_dict_name='vader', classes_count=3):
        """
        Analyze all documents for all companies.

        :param from_date:
        :param to_date:
        :param file_name:
        :param price_type:
        :param used_dict_name:
        :return:
        """
        # Reset files.
        self.text_writer.write_econometric_file(file_name, [self._get_days_stats_header()], 'w')
        total_m_header = self.total_metrics_calculator.get_total_metrics_header()
        self.text_writer.write_econometric_file(file_name + '_total-metrics', [total_m_header], 'w')
        source_m_header = self.source_metrics_calculator.get_source_metrics_header()
        self.text_writer.write_econometric_file(file_name + '_source-metrics', [source_m_header], 'w')
        # Process companies
        companies = self.dbmodel.get_companies_order_by_total_documents(from_date, to_date)
        for comp in companies:
            print("<<<<<Company %d>>>>>") % comp['id']
            if not self.verbose:
                with FaCommon.Helpers.suppress_stdout():
                    self.analyze_company(comp['id'], from_date, to_date, file_name, price_type, const_boundaries, used_dict_name, False, classes_count)
            else:
                self.analyze_company(comp['id'], from_date, to_date, file_name, price_type, const_boundaries, used_dict_name, False, classes_count)
        print('>>>All stuff saved.')

    def analyze_company(self, company_id, from_date, to_date, file_name, price_type, const_boundaries, used_dict_name, write_header=False, classes_count=3):
        """
        Analyze documents about company (from_date -> present date).

        :return: list of days, where every row contains information for documents for this day.
        """
        # Prepare variables.
        examined_date = from_date
        last_date = to_date
        total_data = []
        max_sent = float('-inf')

        # Set stock prices for this company ID.
        self.stock_processor.set_stock_prices(company_id, examined_date, price_type)
        #exit(self.stock_processor.get_price_movement_with_delay(examined_date, 2))

        # Prepare list for writing to a file.
        # For every day (from "from_date" to "to_date"), query the DB for documents created on the day.
        while examined_date <= last_date:
            print("===%s===") % examined_date
            # For every document type, process all documents and count number of neutral, positive, negative documents.
            yahoo_values = self._process_yahoo(company_id, examined_date, used_dict_name)
            fb_p_values = self._process_fb_posts(company_id, examined_date, used_dict_name)
            fb_c_values = self._process_fb_comments(company_id, examined_date, used_dict_name)
            tw_values = self._process_tweets(company_id, examined_date, used_dict_name)
            # Save acquired data
            day_data = [
                company_id,
                examined_date.strftime('%d.%m.%Y'),
                fb_p_values['neu'], fb_p_values['pos'], fb_p_values['neg'],
                fb_c_values['neu'], fb_c_values['pos'], fb_c_values['neg'],
                yahoo_values['neu'], yahoo_values['pos'], yahoo_values['neg'],
                tw_values['neu'], tw_values['pos'], tw_values['neg'],
            ]
            # Get stock price movement direction for 1,2,3 days from examined date. Also for previous day.
            day_data.append(self.stock_processor.get_price_movement_with_delay(examined_date, -1, const_boundaries))
            day_data.append(self.stock_processor.get_price_movement_with_delay(examined_date, 1, const_boundaries))
            day_data.append(self.stock_processor.get_price_movement_with_delay(examined_date, 2, const_boundaries))
            day_data.append(self.stock_processor.get_price_movement_with_delay(examined_date, 3, const_boundaries))
            # Calculate simple sentiment for all sources.
            fb_post_s = self._calc_source_sentiment(fb_p_values)
            fb_comment_s = self._calc_source_sentiment(fb_c_values)
            yahoo_s = self._calc_source_sentiment(yahoo_values)
            twitter_s = self._calc_source_sentiment(tw_values)
            day_data.extend([fb_post_s, fb_comment_s, yahoo_s, twitter_s])
            # Calculate overall sentiment for the day.
            (max_sent, day_sent) = self._calc_overall_sentiment_for_day(max_sent, fb_p_values, fb_c_values, yahoo_values, tw_values)
            day_data.append(day_sent)
            # Save day data to total data.
            total_data.append(day_data)
            # Increment examined date.
            examined_date = examined_date + datetime.timedelta(days=1)

        # Normalize sentiment values.
        for i, day_data in enumerate(total_data):
            norm_sent = self._normalize_sentiment(total_data[i][-1], max_sent)
            string_sent = self._format_sentiment(norm_sent)
            total_data[i][-1] = string_sent

        # Write results to file.
        if write_header:
            total_data.insert(0, self._get_days_stats_header())
            self.text_writer.write_econometric_file(file_name, total_data, 'w')
            del(total_data[0])
        else:
            self.text_writer.write_econometric_file(file_name, total_data, 'a')

        # Calculate metrics by source.
        m_filename = file_name + '_source-metrics'
        if classes_count == 3:
            self.source_metrics_calculator.calculate_metrics_by_source(company_id, total_data, m_filename, price_type, write_header)
        else:
            self.source_metrics_calculator_2_classes.calculate_metrics_by_source(company_id, total_data, m_filename, price_type, write_header)

        # Calculate total metrics.
        m_filename = file_name + '_total-metrics'
        self.total_metrics_calculator.calculate_total_metrics(company_id, total_data, m_filename, price_type, write_header)



    #### PRIVATE methods for processing documents

    def _process_fb_posts(self, company_id, examined_date, used_dict_name='vader'):
        # Select all FB posts for given company created on given date.
        posts = self.dbmodel.get_daily_fb_posts(company_id, examined_date)
        counter = {'pos': 0, 'neu': 0, 'neg': 0}
        # Calculate sentiment for all posts
        for post in posts:
            #print("FB post: %s") % post['id'],
            post_text = TextProcessing.process_facebook_text(post['text'])
            if len(post_text) == 0:
                continue    # skip empty posts
            sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, post_text, False)
            polarity = self.s_analyzer.format_sentiment_value(sent_value)
            counter[polarity] += 1
            #print("| %s ... %s") % (str(round(sent_value, 4)), polarity)
        # result
        return counter

    def _process_fb_comments(self, company_id, examined_date, used_dict_name='vader'):
        # Select all FB comments.
        comments = self.dbmodel.get_daily_fb_comments(company_id, examined_date)
        counter = {'pos': 0, 'neu': 0, 'neg': 0}
        # Calculate sentiment for all posts
        for com in comments:
            #print("FB comment: %s") % com['id'],
            com_text = TextProcessing.process_facebook_text(com['text'])
            if len(com_text) == 0:
                continue    # skip empty comments
            sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, com_text, False)
            polarity = self.s_analyzer.format_sentiment_value(sent_value)
            counter[polarity] += 1
            #print("| %s ... %s") % (str(round(sent_value, 4)), polarity)
        # result
        return counter

    def _process_yahoo(self, company_id, examined_date, used_dict_name='vader'):
        # Select all Yahoo Finance articles.
        articles = self.dbmodel.get_daily_articles(company_id, examined_date)
        counter = {'pos': 0, 'neu': 0, 'neg': 0}
        # Calculate sentiment for all articles
        for art in articles:
            #print("Yahoo article: %s") % art['id'],
            art_text = TextProcessing.process_article_text(art['text'])
            if len(art_text) == 0:
                continue    # skip empty articles
            sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, art_text, True)
            polarity = self.s_analyzer.format_sentiment_value(sent_value)
            counter[polarity] += 1
            #print("| %s ... %s") % (str(round(sent_value, 4)), polarity)
        # result
        return counter

    def _process_tweets(self, company_id, examined_date, used_dict_name='vader'):
        # Select all Yahoo Finance articles.
        tweets = self.dbmodel.get_daily_tweets(company_id, examined_date)
        counter = {'pos': 0, 'neu': 0, 'neg': 0}
        # Calculate sentiment for all articles.
        for tw in tweets:
            #print("Tweet: %s") % tw['tw_id'],
            tw_text = TextProcessing.process_facebook_text(tw['text'])
            if len(tw_text) == 0:
                continue    # skip empty tweets
            sent_value = self.s_analyzer.calculate_vader_sentiment(used_dict_name, tw_text, False)
            polarity = self.s_analyzer.format_sentiment_value(sent_value)
            counter[polarity] += 1
            #print("| %s ... %s") % (str(round(sent_value, 4)), polarity)
        # result
        return counter

    ## PRIVATE methods for determining sentiment of the whole day

    def _calc_source_sentiment(self, s_dict):
        """
        Calculate sentiment for given source dictionary.

        :param s_dict: dictionary (sentiment -> number of documents}
        :return: string (pos, neg, neu)
        """
        max_s = max(s_dict.keys(), key=lambda k: s_dict[k])
        # If neutral value is also the biggest one, choose it.
        if s_dict['neu'] == s_dict[max_s]:
            return 'neu'
        return max_s


    @staticmethod
    def _calc_overall_sentiment_for_day(max_sent, fb_p_values, fb_c_values, yahoo_values, tw_values):
        # Calculate numeric sentiment
        fb_p_sent = fb_p_values['pos'] - fb_p_values['neg']
        fb_c_sent = fb_c_values['pos'] - fb_p_values['neg']
        yahoo_sent = yahoo_values['pos'] - fb_p_values['neg']
        tw_sent = tw_values['pos'] - fb_p_values['neg']
        overall_sent = fb_p_sent + fb_c_sent + yahoo_sent + tw_sent
        #print fb_p_sent,fb_c_sent,yahoo_sent,tw_sent
        # Is the new sentiment larger than current largest one?
        if overall_sent > max_sent:
            max_sent = overall_sent
        return max_sent, overall_sent

    @staticmethod
    def _normalize_sentiment(score, alpha=100):
        """
        Normalize the score to be between -1 and 1 using an alpha that approximates the max expected value.
        """
        try:
            norm_score = score/math.sqrt((score*score) + alpha)
        except ZeroDivisionError:
            norm_score = score
        return norm_score

    @staticmethod
    def _format_sentiment(norm_score):
        if -0.1 < norm_score < 0.1:
            return 'neu'
        elif norm_score > 0:
            return 'pos'
        elif norm_score < 0:
            return 'neg'

    @staticmethod
    def _get_days_stats_header():
        header_days = [
            'company_id', 'date',
            'fb_post_neutral', 'fb_post_positive', 'fb_post_negative',
            'fb_comment_neutral', 'fb_comment_positive', 'fb_comment_negative',
            'yahoo_neutral', 'yahoo_positive', 'yahoo_negative',
            'twitter_neutral', 'twitter_positive', 'twitter_negative',
            'stock_dir_-1', 'stock_dir_1', 'stock_dir_2', 'stock_dir_3',
            'sentiment_fb_post', 'sentiment_fb_comment', 'sentiment_yahoo', 'sentiment_twitter',
            'overall_sentiment',
        ]
        return header_days
コード例 #5
0
class SimpleExporter(object):

    def __init__(self, output_dir, verbose=False):
        self.dbmodel = BasicDbModel()
        self.s_analyzer = LexiconSentimentAnalyzer()
        self.text_writer = TextWriter(os.path.abspath(output_dir))
        self.verbose = verbose

    def analyze_company(self, company_id, from_date, to_date):
        """
        Analyze documents of the given company - just write VADER scores of every document.
        :param company_id:
        :param from_date:
        :param to_date:
        :return:
        """
        print('==Company %d==') % company_id
        # For every source, create a standalone file.
        for source in ['articles', 'fb_posts', 'fb_comments', 'tweets']:
            self._analyze_documents_by_source(company_id, source, from_date, to_date)
        # end

    def analyze_all_companies(self, from_date, to_date):
        companies = self.dbmodel.get_companies()
        # For every source, create a standalone file.
        for source in ['articles', 'fb_posts', 'fb_comments', 'tweets']:
            print('====Processing %s====') % source
            # Prepare file.
            header_line = ['company_id', 'date', 'sentiment_number', 'sentiment_polarity']
            self.text_writer.write_file([header_line], source, 'csv', ',', 'w')
            # Browse all companies.
            for comp in companies:
                print('==Company %d==') % comp['id']
                self._analyze_company_source(comp['id'], source, from_date, to_date, source)
                #break


    def _analyze_company_source(self, company_id, source_type, from_date, to_date, write_to_filename):
        # Get documents.
        documents = getattr(self.dbmodel, 'get_'+source_type)(company_id, from_date, to_date)
        # Create a final list.
        docs_list = []
        for doc in documents:
            #print doc['id'],
            # Process text.
            if source_type == 'articles':
                text = TextProcessing.process_article_text(doc['text'])
            else:
                text = TextProcessing.process_facebook_text(doc['text'])
            # Skip empty documents.
            if len(text) == 0:
                continue
            # Get sentiment values of the text.
            if source_type == 'articles':
                sentiment_number = self.s_analyzer.calculate_vader_sentiment('custom_dict_orig', text, True)
            else:
                sentiment_number = self.s_analyzer.calculate_vader_sentiment('custom_dict_orig', text, False)
            sentiment_polarity = self.s_analyzer.format_sentiment_value(sentiment_number)
            # Save data.
            doc_date = self._get_doc_date(source_type, doc)
            docs_list.append([company_id, doc_date, sentiment_number, sentiment_polarity])
        # Write to file.
        self.text_writer.write_file(docs_list, write_to_filename, 'csv', ',', 'a')


    def _analyze_documents_by_source(self, company_id, source_type, from_date, to_date, write_to_filename=False):
        # Get documents
        documents = getattr(self.dbmodel, 'get_'+source_type)(company_id, from_date, to_date)
        # Create a final list
        docs_list = []
        for doc in documents:
            # Process text
            if source_type == 'articles':
                text = TextProcessing.process_article_text(doc['text'])
            else:
                text = TextProcessing.process_facebook_text(doc['text'])
            # Skip empty documents
            if len(text) == 0:
                continue
            # Get sentiment values of the text
            sent_sum, sent_division = self.s_analyzer.calculate_vader_sentiment_values('vader', text)
            # Add this to list
            docs_list.append([sent_sum, sent_division, text])
        # Prepare header
        header = ['sentiment_sum', 'sentiment_division', 'text']
        docs_list.insert(0, header)
        file_name = '%d_%s' % (company_id, source_type)
        self.text_writer.write_file(docs_list, file_name, 'csv', '\t', 'w')


    def _get_doc_date(self, source_type, doc):
        if source_type == 'articles':
            date_obj = doc['published_date']
        elif source_type in ['fb_posts', 'fb_comments']:
            date_obj = self.dbmodel.from_timestamp_to_date(doc['created_timestamp'])
        elif source_type == 'tweets':
            date_obj = doc['created_at']
        return date_obj.strftime('%Y-%m-%d')
コード例 #6
0
 def __init__(self, output_dir, verbose=False):
     self.dbmodel = BasicDbModel()
     self.s_analyzer = LexiconSentimentAnalyzer()
     self.text_writer = TextWriter(os.path.abspath(output_dir))
     self.verbose = verbose
コード例 #7
0
class TotalMetricsCalculator(object):

    def __init__(self, output_dir):
        """
        Constructor
        :param output_dir: absolute filepath to output directory
        :return:
        """
        self.output_dir = output_dir
        self.text_writer = TextWriter(output_dir)
        # Set input data indices.
        self.source_sent_pos = {'fb_post': 18, 'fb_comment': 19, 'yahoo': 20, 'twitter': 21}
        self.price_dir_indices = {-1: 14, 1: 15, 2: 16, 3: 17}
        self.day_delays = [-1, 1, 2, 3]

    def calculate_total_metrics(self, company_id, total_data, file_name, price_type, write_header=False):
        # Calculate metrics
        results = self._evaluate_results_for_company(total_data)
        metrics = self._calc_metrics_from_results(results)
        # Save to file
        m_list = self._format_total_metrics_to_list(company_id, price_type, metrics)
        if write_header:
            w_list = [self.get_total_metrics_header()]
            w_list.extend(m_list)
            self.text_writer.write_econometric_file(file_name, w_list, 'w')
        else:
            self.text_writer.write_econometric_file(file_name, m_list, 'a')

    def _evaluate_results_for_company(self, total_data):
        """For every day and every delay evaluate relation between sentiment value and stock price movement."""
        # Prepare variables
        stats = {}
        for i in self.day_delays:
            stats[i] = {
                'pos_up': 0.0, 'pos_down': 0.0, 'pos_const': 0.0,
                'neg_up': 0.0, 'neg_down': 0.0, 'neg_const': 0.0,
                'neu_up': 0.0, 'neu_down': 0.0, 'neu_const': 0.0,
            }
        # Process all days
        for day in total_data:
            for i in self.day_delays:
                price_mov = day[self.price_dir_indices[i]]
                # Skip FALSE price movements.
                if not price_mov:
                    continue
                stats[i][day[-1] + '_' + price_mov] += 1
        # result
        return stats

    def _calc_metrics_from_results(self, results):
        """Calculate some metrics from evaluated results (confusion matrix)."""
        # Count total number of values in one delay data.
        total_values_count = sum(results.values()[0].values())
        # Process all delays and calculate metrics.
        metrics = {}
        for delay, data in results.items():
            if total_values_count == 0:
                accuracy = None
            else:
                accuracy = (data['pos_up'] + data['neg_down'] + data['neu_const']) / total_values_count
            try:
                precision = data['pos_up'] / (data['pos_up'] + data['pos_down'])
                recall = data['pos_up'] / (data['pos_up'] + data['neg_up'])
            except ZeroDivisionError:
                precision = None
                recall = None
            metrics[delay] = {'accuracy': accuracy, 'precision': precision, 'recall': recall}
        # Result
        return metrics

    def _format_total_metrics_to_list(self, company_id, price_type, metrics):
        ordered_keys = sorted(metrics.keys())
        lines = []
        for delay in ordered_keys:
            d_line = [company_id, price_type, delay]
            d_line.extend([metrics[delay]['accuracy'], metrics[delay]['precision'], metrics[delay]['recall']])
            lines.append(d_line)
        # Result
        return lines

    def get_total_metrics_header(self):
        header = [
            'company_id', 'price_type', 'delay',
            'accuracy', 'precision', 'recall',
        ]
        return header