def __repr__(self):
      """
      Emits a brief summary of all the statistics as a string.
      """
      fields = [
        ['size',           'Length' ,       'Yes'],
        ['min',            'Min' ,          'Yes'],
        ['max',            'Max' ,          'Yes'],
        ['mean',           'Mean' ,         'Yes'],
        ['sum',            'Sum' ,          'Yes'],
        ['var',            'Variance' ,     'Yes'],
        ['std',            'Standard Deviation' , 'Yes'],
        ['num_undefined', '# Missing Values' , 'Yes',],
        ['num_unique',     '# unique values',  'No' ]
      ]

      s = '\n'
      result = []
      for field in fields:
        try:
          method_to_call = getattr(self, field[0])
          result.append([field[1], str(method_to_call()), field[2]])
        except:
          pass
      sf = SArray(result).unpack(column_name_prefix = "")
      sf.rename({'0': 'item', '1':'value', '2': 'is exact'})
      s += sf.__str__(footer=False)
      s += "\n"

      s += "\nMost frequent items:\n"
      frequent = self.frequent_items()
      sorted_freq = sorted(frequent.iteritems(), key=operator.itemgetter(1), reverse=True)
      if len(sorted_freq) == 0:
          s += " -- All elements appear with less than 0.01% frequency -- \n"
      else:
        sorted_freq = sorted_freq[:10]
        sf = SFrame()
        sf.add_column(SArray(['count']), 'value')
        for elem in sorted_freq:
          sf.add_column(SArray([elem[1]]), str(elem[0]))
        s += sf.__str__(footer=False) + "\n"
      s += "\n"

      try:
        # print quantiles
        t = self.quantile(0)
        s += "Quantiles: \n"
        sf = SFrame()
        for q in [0.0,0.01,0.05,0.25,0.5,0.75,0.95,0.99,1.00]:
          sf.add_column(SArray([self.quantile(q)]), str(int(q * 100)) + '%')
        s += sf.__str__(footer=False) + "\n"
      except:
        pass

      try:
        t_k = self.dict_key_summary()
        t_v = self.dict_value_summary()
        s += "\n******** Dictionary Element Key Summary ********\n"
        s += t_k.__repr__()
        s += "\n******** Dictionary Element Value Summary ********\n"
        s += t_v.__repr__() + '\n'
      except:
        pass

      try:
        t_k = self.element_summary()
        s += "\n******** Element Summary ********\n"
        s += t_k.__repr__() + '\n'
      except:
        pass

      return s.expandtabs(8)
Esempio n. 2
0
    def __repr__(self):
        """
      Emits a brief summary of all the statistics as a string.
      """
        fields = [['size', 'Length', 'Yes'], ['min', 'Min', 'Yes'],
                  ['max', 'Max', 'Yes'], ['mean', 'Mean', 'Yes'],
                  ['sum', 'Sum', 'Yes'], ['var', 'Variance', 'Yes'],
                  ['std', 'Standard Deviation', 'Yes'],
                  [
                      'num_undefined',
                      '# Missing Values',
                      'Yes',
                  ], ['num_unique', '# unique values', 'No']]

        s = '\n'
        result = []
        for field in fields:
            try:
                method_to_call = getattr(self, field[0])
                result.append([field[1], str(method_to_call()), field[2]])
            except:
                pass
        sf = SArray(result).unpack(column_name_prefix="")
        sf.rename({'0': 'item', '1': 'value', '2': 'is exact'})
        s += sf.__str__(footer=False)
        s += "\n"

        s += "\nMost frequent items:\n"
        frequent = self.frequent_items()
        sorted_freq = sorted(frequent.iteritems(),
                             key=operator.itemgetter(1),
                             reverse=True)
        if len(sorted_freq) == 0:
            s += " -- All elements appear with less than 0.01% frequency -- \n"
        else:
            sorted_freq = sorted_freq[:10]
            sf = SFrame()
            sf.add_column(SArray(['count']), 'value')
            for elem in sorted_freq:
                sf.add_column(SArray([elem[1]]), str(elem[0]))
            s += sf.__str__(footer=False) + "\n"
        s += "\n"

        try:
            # print quantiles
            t = self.quantile(0)
            s += "Quantiles: \n"
            sf = SFrame()
            for q in [0.0, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 1.00]:
                sf.add_column(SArray([self.quantile(q)]),
                              str(int(q * 100)) + '%')
            s += sf.__str__(footer=False) + "\n"
        except:
            pass

        try:
            t_k = self.dict_key_summary()
            t_v = self.dict_value_summary()
            s += "\n******** Dictionary Element Key Summary ********\n"
            s += t_k.__repr__()
            s += "\n******** Dictionary Element Value Summary ********\n"
            s += t_v.__repr__() + '\n'
        except:
            pass

        try:
            t_k = self.element_summary()
            s += "\n******** Element Summary ********\n"
            s += t_k.__repr__() + '\n'
        except:
            pass

        return s.expandtabs(8)