def get_density(self, x):
        """
        Returns the density for a given point. The density is derived in two steps:
        - locating the point in the distribution that is closest to x (according to Euclidian distance)
        - dividing the probability mass for the point by the n-dimensional volume
          around this point. The radius of the ball is the half of the minimum distance
          between the points of the distribution.

        :param x: the point
        :return: the density at the point
        """
        if isinstance(x, float):
            x = np.array([x])
        closest = []
        closest_dist = np.inf
        for point in self._points.keys():
            cur_dist = MathUtils.get_distance(point, x)
            if cur_dist < closest_dist:
                closest = point
                closest_dist = cur_dist

        if closest_dist < self._min_distance / 2:
            return self._points[closest] / MathUtils.get_volume(self._min_distance / 2, self.get_dimensions())
        else:
            return 0
 def __init__(self):
     self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source")
     self.ignore_keys = ['Margins % of Sales', 'Profitability', 'Cash Flow Ratios', 'Balance Sheet Items (in %)',
                         'Liquidity/Financial Health', 'Efficiency']
     self.sheet_list = ['Income_Statement', 'Balance_Sheet', 'Cash_Flow_Statement', 'Key_Ratios']
     self.math_utils = MathUtils()
     self.file_utils = FileUtils()
 def __init__(self):
     self.tree_str = TreeStructure()
     self.tree = self.tree_str.tree
     self.report_choice = con.INCOME_STATEMENTS
     self.file_utils = FileUtils()
     self.math_utils = MathUtils()
     self.root_path = join(expanduser('~'), 'PycharmProjects',
                           'python-vistalytics', 'source')
 def test_maths(self):
     assert MathUtils.get_volume(2.0, 1) == pytest.approx(4.0, abs=0.001)
     assert MathUtils.get_volume(2.0, 2) == pytest.approx(math.pi * 4,
                                                          abs=0.001)
     assert MathUtils.get_volume(2.0, 3) == pytest.approx(4.0 / 3.0 *
                                                          math.pi * 8,
                                                          abs=0.001)
     assert MathUtils.get_volume(3.0, 4) == pytest.approx(
         math.pow(math.pi, 2) / 2 * 81, abs=0.001)
Exemple #5
0
class QuarterlyAverageChangeAnalysis(object):
    def __init__(self):
        self.root_path = join(expanduser('~'), "PycharmProjects",
                              "python-vistalytics", "source")
        self.math_utils = MathUtils()
        self.file_utils = FileUtils()

    def run(self):
        path = join(self.root_path, "in", 'quarter')
        file_list = self.file_utils.get_files(path)
        for f in file_list:
            key_list = []
            change = []
            percent_change = []
            data = self.file_utils.read_csv(path, f)

            if 'TTM' in data:
                data = data.drop('TTM', 1)

            for i, v in data.iterrows():
                # Prepare Columns
                key_list.append(i)
                size_val = len(v)
                change.append(
                    self.math_utils.get_change(v[size_val - 5],
                                               v[size_val - 1]))
                percent_change.append(
                    self.math_utils.percentage_change(v[size_val - 5],
                                                      v[size_val - 1]))

            # Write csv.
            output_data = {
                'Keys': key_list,
                'Year Over Year Change': change,
                'Year Over Year Change (%)': percent_change
            }

            output_data = pd.DataFrame(output_data,
                                       columns=[
                                           'Keys', 'Year Over Year Change',
                                           'Year Over Year Change (%)'
                                       ])

            output_dir = join(
                self.root_path, 'out',
                splitext(f)[0].replace(" ", "_") + "_Quarterly_Report.csv")
            self.file_utils.write_csv(output_dir, output_data)
    def __init__(self, points=None):
        if isinstance(points, dict):
            """
            Creates a new discrete density function, given the set of points

            :param points: a set of (value,prob) pairs
            """

            # the set of points for the density function
            self._points = dict()  # {point: probability}
            self._points.update(points)

            # minimum distance between points
            points_keys = np.array(list(map(lambda x: np.array(x), points.keys())))
            self._min_distance = MathUtils.get_min_euclidian_distance(points_keys)
            # the volume employed for the normalisation
            self._volume = MathUtils.get_volume(self._min_distance / 2., self.get_dimensions())
        else:
            raise NotImplementedError()
 def _get_node_list(self, sub_tree):
     node_id_list = []
     sub_tree_root_node = sub_tree.get_node(sub_tree.root)
     for node in sub_tree.all_nodes():
         self.tree.get_node(node.identifier).data[con.TAGGED] = False
         if not node.is_root() and MathUtils().compare(
                 node.data[con.SLOPE_VALUE],
                 sub_tree_root_node.data[con.SLOPE_VALUE]):
             node_id_list.append(node.identifier)
     return node_id_list
Exemple #8
0
class YearlyAverageChangeAnalysis(object):
    def __init__(self):
        self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source")
        self.file_utils = FileUtils()
        self.math_utils = MathUtils()

    def run(self):
        src_dir_path = join(self.root_path, "in", "annual")
        file_list = self.file_utils.get_files(src_dir_path)

        for f in file_list:
            key_list = []
            avg_change_3_years_list = []
            percentage_change_3_years_list = []
            avg_change_5_years_list = []
            percentage_change_5_years_list = []
            data = self.file_utils.read_csv(src_dir_path, f)
            if 'TTM' in data:
                data = data.drop('TTM', 1)
            for i, v in data.iterrows():
                # Prepare columns.
                size_val = len(v)
                key_list.append(i)
                avg_change_3_years_list.append(self.math_utils.average_change(v[size_val - 3:]))
                percentage_change_3_years_list.append(self.math_utils.percentage_change(v[size_val - 3],
                                                                                        v[size_val - 1]))
                avg_change_5_years_list.append(self.math_utils.average_change(v[size_val - 5:]))
                percentage_change_5_years_list.append(self.math_utils.percentage_change(v[size_val - 5],
                                                                                        v[size_val - 1]))

            # Write to csv.
            output_data = {'Keys': key_list,
                           'Average Change Over Last 3 Years': avg_change_3_years_list,
                           'Average Change Over Last 3 Years (%)': percentage_change_3_years_list,
                           'Average Change Over Last 5 Years': avg_change_5_years_list,
                           'Average Change Over Last 5 Years (%)': percentage_change_5_years_list}
            output_data_frame = pd.DataFrame(output_data, columns=['Keys', 'Average Change Over Last 3 Years',
                                                                   'Average Change Over Last 3 Years (%)',
                                                                   'Average Change Over Last 5 Years',
                                                                   'Average Change Over Last 5 Years (%)'])
            output_dir_path = join(self.root_path, "out",
                                   splitext(f)[0].replace(" ", "_") + "_Annual_Report.csv")
            self.file_utils.write_csv(output_dir_path, output_data_frame)
 def _filter_nodes(self):
     node_id_list = []
     for node in self.tree.all_nodes():
         if not node.is_root():
             parent_node = self.tree.parent(node.identifier)
             if not MathUtils().compare(node.data[con.SLOPE_VALUE], parent_node.data[con.SLOPE_VALUE]) and\
                     not parent_node.data[con.TAGGED]:
                 node.data[con.TAGGED] = True
                 # print('###### Tagged Nodes: ' + node.identifier + ' data: ' + str(node.data))
                 node_id_list.append(node.identifier)
             else:
                 node.data[con.TAGGED] = False
                 # print('Un-tagged Nodes: ' + node.identifier + ' data: ' + str(node.data))
     return node_id_list
    def _process_tree(self):
        root_node = self.tree.get_node(self.tree.root)
        try:
            print(root_node.data[con.VALUES])
        except TypeError:
            dummy_list = self.tree.children(root_node.identifier)
            root_node.data = {
                con.VALUES: [(x + y + z) for x, y, z in zip(
                    dummy_list[0].data[con.VALUES], dummy_list[1].data[
                        con.VALUES], dummy_list[2].data[con.VALUES])],
                con.TAGGED:
                False
            }

        for i in self.tree.all_nodes():
            i.data[con.AVERAGE_CHANGE] = MathUtils().average_change(
                i.data[con.VALUES])
            slope, const = MathUtils().get_linear_function_properties(
                i.data[con.VALUES])
            i.data[con.AVERAGE_PERCENTAGE_CHANGE] = MathUtils().average_change(
                MathUtils().merge_list_percentage(i.data[con.VALUES],
                                                  root_node.data[con.VALUES]))
            i.data[con.SLOPE_VALUE] = slope
            i.data[con.CONSTANT_VALUE] = const
    def get_cdf(self, x):
        """
        Returns the cumulative probability distribution for the KDE.

        :param x: the point
        :return: the cumulative probability from 0 to x.
        """
        if isinstance(x, float):
            x = np.array([x])
        if len(x) != self.get_dimensions():
            raise ValueError("Illegal dimensionality: ", x.length, "!=",
                             self.get_dimensions())
        nb_lower_points = 0
        for data_idx in range(len(self._points)):
            if MathUtils.is_lower(self._points[data_idx, :], x):
                nb_lower_points += 1

        return nb_lower_points / len(self._points)
    def get_cdf(self, x):
        """
        Returns the cumulative distribution for the distribution (by counting all the
        points with a value that is lower than x).

        :param x: the point
        :return: the cumulative density function up to the point
        """
        if isinstance(x, float):
            x = np.array([x])
        if len(x) != self.get_dimensions():
            raise ValueError("Illegal dimensionality: %d != %d" % (len(x), self.get_dimensions()))

        cdf = 0.
        for point in self._points.keys():
            if MathUtils.is_lower(np.array(point), x):
                cdf += self._points[point]

        return cdf
    def get_prob(self, value):
        """
        Returns the probability P(val).

        :param value: the value
        :return: the associated probability, if one exists.
        """

        if value in self._table:
            return self._table[value]
        elif isinstance(value, DoubleVal) and self._is_continuous():
            # if the distribution has continuous values, search for the closest element
            to_find = value.get_double()
            closest = None
            min_distance = math.inf
            for v in self._table.keys():
                distance = abs(v.get_double() - to_find)
                if distance < min_distance:
                    closest = v
                    min_distance = distance

            return self.get_prob(closest)

        elif isinstance(value, ArrayVal) and self._is_continuous():
            to_find = value.get_array()
            closest = None
            min_distance = math.inf
            for v in self._table.keys():
                if isinstance(v, NoneVal):
                    continue

                distance = MathUtils.get_distance(v.get_array(), to_find)
                if distance < min_distance:
                    closest = v
                    min_distance = distance

            return self.get_prob(closest)

        return 0.
Exemple #14
0
 def __init__(self):
     self.root_path = join(expanduser('~'), "PycharmProjects",
                           "python-vistalytics", "source")
     self.math_utils = MathUtils()
     self.file_utils = FileUtils()
class ReportOutlierDetection(object):
    def __init__(self):
        self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source")
        self.ignore_keys = ['Margins % of Sales', 'Profitability', 'Cash Flow Ratios', 'Balance Sheet Items (in %)',
                            'Liquidity/Financial Health', 'Efficiency']
        self.sheet_list = ['Income_Statement', 'Balance_Sheet', 'Cash_Flow_Statement', 'Key_Ratios']
        self.math_utils = MathUtils()
        self.file_utils = FileUtils()

    def _process_data(self, values, v):
        std_dev = self.math_utils.get_std_dev(values)
        mean_val = self.math_utils.get_mean(values)
        if 0 < 3 * std_dev < abs(v - mean_val):
            return std_dev, mean_val
        return 0, 0

    def run(self):
        src_dir_path = join(self.root_path, "in", "combined")
        file_list = self.file_utils.get_files(src_dir_path)
        for f in file_list:
            data = self.file_utils.read_excel(src_dir_path, f, [0, 1, 2, 3])
            for j in range(0, 4):
                avg_change_3_years_list = []
                percentage_change_3_years_list = []
                avg_change_5_years_list = []
                percentage_change_5_years_list = []
                dev_list = []
                mean_list = []
                temp = data.get(j)
                if 'TTM' in temp:
                    temp = temp.drop('TTM', 1)
                temp.fillna(0, inplace=True)
                temp.replace('(%)', '', inplace=True, regex=True)

                for i, v in temp.iterrows():
                    size_val = len(v)
                    if not pd.isnull(i) and i not in self.ignore_keys and size_val is not 0:
                        v = [float(x) for x in v]
                        # Get mean and deviation
                        x, y = self._process_data(v[0:size_val-2], v[size_val - 1])
                        dev_list.append(x)
                        mean_list.append(y)

                        # Get average and percentage for 3 and 5 years.
                        val1 = self.math_utils.percentage_change(0 if x is 0 else v[size_val - 3], v[size_val - 1])
                        val2 = self.math_utils.percentage_change(0 if x is 0 else v[size_val - 5], v[size_val - 1])
                        avg_change_3_years_list.append(0 if abs(val1) < 5
                                                            else self.math_utils.average_change(v[len(v) - 3:]))
                        percentage_change_3_years_list.append(val1)
                        avg_change_5_years_list.append(0 if abs(val2) < 5
                                                            else self.math_utils.average_change(v[size_val - 5:]))
                        percentage_change_5_years_list.append(val2)
                        print("################################")
                        print("For Index: " + i + " Last change " + str(v[size_val-1]) + " is outlier.")
                    else:
                        avg_change_3_years_list.append(0)
                        percentage_change_3_years_list.append(0)
                        avg_change_5_years_list.append(0)
                        percentage_change_5_years_list.append(0)
                        dev_list.append(0)
                        mean_list.append(0)

                temp.insert(len(temp.columns), 'Deviation', dev_list)
                temp.insert(len(temp.columns), 'Mean', mean_list)
                temp.insert(len(temp.columns), 'Average Change Over Last 3 Years', avg_change_3_years_list)
                temp.insert(len(temp.columns), 'Average Change Over Last 3 Years (%)',
                            percentage_change_3_years_list)
                temp.insert(len(temp.columns), 'Average Change Over Last 5 Years',
                            avg_change_5_years_list)
                temp.insert(len(temp.columns), 'Average Change Over Last 5 Years (%)',
                            percentage_change_5_years_list)

                # print(temp)
                out_src_dir = join(self.root_path, 'out', splitext(f)[0].replace(" ", "_") + "_Combined_Report.xlsx")
                self.file_utils.write_excel(out_src_dir, temp, self.sheet_list[j])
class ReportChangeComparision:
    def __init__(self):
        self.tree_str = TreeStructure()
        self.tree = self.tree_str.tree
        self.report_choice = con.INCOME_STATEMENTS
        self.file_utils = FileUtils()
        self.math_utils = MathUtils()
        self.root_path = join(expanduser('~'), 'PycharmProjects',
                              'python-vistalytics', 'source')

    def _read_csv(self, resource_dir):
        src_dir_path = join(self.root_path, 'in', 'csv', resource_dir)
        files = self.file_utils.get_files(src_dir_path)
        data_list = []
        for f in files:
            data_list.append(self.file_utils.read_csv(src_dir_path, f))
        return data_list

    def _process_data(self, data_list):
        for data in data_list:
            if con.TTM in data.columns:
                data.drop(con.TTM, 1, inplace=True)
            for i, v in data.iterrows():
                if self.tree.__contains__(self.tree_str.get_string(i)):
                    node = self.tree.get_node(self.tree_str.get_string(i))
                    node.data = {con.VALUES: v.tolist()}

    def _process_tree(self):
        root_node = self.tree.get_node(self.tree.root)
        try:
            values = root_node.data[con.VALUES]
        except TypeError:
            dummy_list = self.tree.children(root_node.identifier)
            root_node.data = {
                con.VALUES: [(x + y + z) for x, y, z in zip(
                    dummy_list[0].data[con.VALUES], dummy_list[1].data[
                        con.VALUES], dummy_list[2].data[con.VALUES])]
            }

        for i in self.tree.all_nodes():
            i.data[con.AVERAGE_CHANGE] = self.math_utils.average_change(
                i.data[con.VALUES])
            slope, const = self.math_utils.get_linear_function_properties(
                i.data[con.VALUES])
            i.data[con.
                   AVERAGE_PERCENTAGE_CHANGE] = self.math_utils.average_change(
                       self.math_utils.merge_list_percentage(
                           i.data[con.VALUES], root_node.data[con.VALUES]))
            i.data[con.SLOPE_VALUE] = slope
            i.data[con.CONSTANT_VALUE] = const

    def _print_text(self):
        root_node = self.tree.get_node(self.tree.root)
        print('##################################################')
        print('For time period 2012-2016 analysed: \n \n')
        for i in self.tree.all_nodes():
            indicator_str1 = con.RAISED if i.data[
                con.AVERAGE_CHANGE] > 0 else con.DROPPED
            if i.is_root():
                print('Index ' + i.identifier.upper() + ' has ' +
                      indicator_str1 + ' by ' +
                      str(i.data[con.AVERAGE_CHANGE]) + '\n \n')
                root_str = {
                    con.KEY: i.identifier.upper(),
                    con.AVERAGE_CHANGE: i.data[con.AVERAGE_CHANGE],
                    con.INDICATOR: indicator_str1
                }
            else:
                indicator_str2 = con.INCREASED if i.data[
                    con.AVERAGE_PERCENTAGE_CHANGE] > 0 else con.REDUCED
                print('Index ' + i.identifier.upper() + ' has ' +
                      indicator_str1 + ' by ' +
                      str(i.data[con.AVERAGE_CHANGE]))
                print('While total ' + str(root_str.get(con.KEY)) + ' has ' +
                      str(root_str.get(con.INDICATOR)) + ' ' +
                      i.identifier.upper() + ' has ' + indicator_str2 +
                      ' by ' + str(i.data[con.AVERAGE_PERCENTAGE_CHANGE]) +
                      '%')

                # Trend comparision with root node
                indicator_str3 = 'in ' + con.OPPOSITE_DIRECTION if i.data[con.SLOPE_VALUE] < 0 \
                    else con.RAPIDLY if i.data[con.SLOPE_VALUE] > root_node.data[con.SLOPE_VALUE] else con.SLOWLY
                print('Index ' + i.identifier.upper() + ' is growing ' +
                      indicator_str3 + ' with rate ' +
                      str(i.data[con.SLOPE_VALUE]) +
                      ' as compared to base index ' +
                      root_node.identifier.upper() + ' where growth rate is ' +
                      str(root_node.data[con.SLOPE_VALUE]))

                # # Comparision with all sibling nodes
                # nid = i.identifier
                # sibling_node = tree.siblings(nid)
                # for n in sibling_node:
                #     indicator_str3 = con.RAPIDLY if i.data[con.SLOPE_VALUE] > n.data[con.SLOPE_VALUE] else con.SLOWLY
                #     print('Index ' + i.identifier.upper() + ' is growing ' + indicator_str3 + ' with rate ' +
                #           str(i.data[con.SLOPE_VALUE]) + ' as compared to base index ' +
                #           n.identifier.upper() + ' where growth rate is ' + str(n.data[con.SLOPE_VALUE]))
                print('\n \n')

    def run(self):
        print(
            "Choices :\n\n\tDefault: Income Statement\n\t1: Balance Statement\n\t2: Cash Flow Statement"
        )
        c = input()
        try:
            self.report_choice = self.tree_str.report_choices[int(c)]
            if self.report_choice is con.BALANCE_STATEMENTS:
                self.tree_str.get_balance_tree()
            else:
                self.tree_str.get_cash_flow_tree()

        except KeyError:
            self.report_choice = con.INCOME_STATEMENTS
            self.tree_str.get_income_tree()
        except ValueError:
            self.report_choice = con.INCOME_STATEMENTS
            self.tree_str.get_income_tree()

        self.tree = self.tree_str.tree
        print(self.report_choice)
        print(self.tree)
        data_list = self._read_csv(self.report_choice)
        # print(data_list)
        self._process_data(data_list)
        # print(self.tree.to_json(with_data=True))
        self._process_tree()
        # print(self.tree.to_json(with_data=True))
        self._print_text()