Exemple #1
0
 def test_missing_value_vids(self):
     vertices = SFrame()
     vertices['vid'] = [1, 2, 3, None]
     edges = SFrame()
     edges['src'] = [1, 2, 3, None]
     edges['dst'] = [4, 4, 4, 4]
     self.assertRaises(
         RuntimeError,
         lambda: SGraph().add_vertices(vertices, 'vid').summary())
     self.assertRaises(
         RuntimeError,
         lambda: SGraph().add_edges(edges, 'src', 'dst').summary())
     self.assertRaises(
         RuntimeError,
         lambda: SGraph().add_edges(edges, 'dst', 'src').summary())
Exemple #2
0
 def test_select_query_with_same_vertex_edge_field(self):
     vertices = SFrame({'__id': range(10)})
     edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)})
     g = SGraph(vertices, edges)
     g.vertices['weight'] = 0
     g.vertices['v'] = 0
     g.edges['weight'] = 0
     g.edges['e'] = 0
     self.assertItemsEqual(
         g.get_fields(),
         ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id'])
     g2 = g.select_fields('weight')
     self.assertItemsEqual(
         g2.get_fields(),
         ['weight', 'weight', '__id', '__src_id', '__dst_id'])
Exemple #3
0
    def test_sframe_le_append_skip_row_bug_is_fixed(self):
        """
        This test is actually for SFrame lazy evaluation.
        The reason it is here is because the repro can only be done in SGraph.

        The bug appears when the SFrame has lazy_append and when passing through
        the logical filter, skip_rows is not done correctly. So the edge_sframe
        is in a bad state when not materialized.

        This unit test stays here to ensure the bug is fixed until we can find
        a more clean repro.
        """
        n = 12  # smallest n to repro the le_append bug

        # A graph with edge i -> i + 1
        g = SGraph().add_edges(
            SFrame({
                'src': range(n),
                'dst': range(1, n + 1)
            }), 'src', 'dst')

        lazy_sf = g.get_edges()
        materialized_sf = g.get_edges()
        materialized_sf.__materialize__()
        assert_frame_equal(
            lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(),
            materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
Exemple #4
0
 def test_graph_constructor(self):
     g = SGraph().add_vertices(self.vertices,
                               'vid').add_edges(self.edges, 'src_id',
                                                'dst_id')
     g2 = SGraph(g.vertices, g.edges)
     g3 = SGraph(g.vertices,
                 g.edges,
                 src_field="__dst_id",
                 dst_field="__src_id")  #flip around src and dst
     assert_frame_equal(
         g.vertices.to_dataframe().sort('__id').reset_index(drop=True),
         g2.vertices.to_dataframe().sort('__id').reset_index(drop=True))
     assert_frame_equal(
         g.edges.to_dataframe().sort(['__src_id',
                                      '__dst_id']).reset_index(drop=True),
         g2.edges.to_dataframe().sort(['__src_id',
                                       '__dst_id']).reset_index(drop=True))
     self.assertRaises(
         ValueError,
         lambda: SGraph(SFrame(self.vertices), SFrame(self.edges)))
     self.assertRaises(
         ValueError, lambda: SGraph(SFrame(self.vertices), SFrame(
             self.edges), 'vid', '__src_id', '__dst_id'))
     self.assertRaises(
         ValueError, lambda: SGraph(SFrame(self.vertices),
                                    SFrame(self.edges),
                                    vid_field=None,
                                    src_field='src_id',
                                    dst_field='dst_id'))
    def get(self, field):
        """
        Return the value for the queried field.

        Get the value of a given field. The list of all queryable fields is
        documented in the beginning of the model class.

        Each of these fields can be queried in one of two ways:

        >>> out = m['graph']      # m is a trained graph analytics model
        >>> out = m.get('graph')  # equivalent to previous line

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        Returns
        -------
        out : value
            The current value of the requested field.

        See Also
        --------
        list_fields

        Examples
        --------
        >>> g = m.get('graph')
        """
        _mt._get_metric_tracker().track('toolkit.graph_analytics.get')

        if field in self.list_fields():
            obj = self.__proxy__.get(field)
            if type(obj) == UnityGraphProxy:
                return SGraph(_proxy=obj)
            elif type(obj) == UnitySFrameProxy:
                return SFrame(_proxy=obj)
            else:
                return obj
        else:
            raise KeyError(
                'Key \"%s\" not in model. Available fields are %s.' %
                (field, ', '.join(self.list_fields())))
Exemple #6
0
 def set_edges_empty(g):
     g.edges = SFrame()
Exemple #7
0
 def set_vertices_empty(g):
     g.vertices = SFrame()
Exemple #8
0
    def __repr__(self):
        """
      Emits a brief summary of all the statistics as a string.
      """
        fields = [['size', 'Length', 'Yes'], ['min', 'Min', 'Yes'],
                  ['max', 'Max', 'Yes'], ['mean', 'Mean', 'Yes'],
                  ['sum', 'Sum', 'Yes'], ['var', 'Variance', 'Yes'],
                  ['std', 'Standard Deviation', 'Yes'],
                  [
                      'num_undefined',
                      '# Missing Values',
                      'Yes',
                  ], ['num_unique', '# unique values', 'No']]

        s = '\n'
        result = []
        for field in fields:
            try:
                method_to_call = getattr(self, field[0])
                result.append([field[1], str(method_to_call()), field[2]])
            except:
                pass
        sf = SArray(result).unpack(column_name_prefix="")
        sf.rename({'0': 'item', '1': 'value', '2': 'is exact'})
        s += sf.__str__(footer=False)
        s += "\n"

        s += "\nMost frequent items:\n"
        frequent = self.frequent_items()
        sorted_freq = sorted(frequent.iteritems(),
                             key=operator.itemgetter(1),
                             reverse=True)
        if len(sorted_freq) == 0:
            s += " -- All elements appear with less than 0.01% frequency -- \n"
        else:
            sorted_freq = sorted_freq[:10]
            sf = SFrame()
            sf.add_column(SArray(['count']), 'value')
            for elem in sorted_freq:
                sf.add_column(SArray([elem[1]]), str(elem[0]))
            s += sf.__str__(footer=False) + "\n"
        s += "\n"

        try:
            # print quantiles
            t = self.quantile(0)
            s += "Quantiles: \n"
            sf = SFrame()
            for q in [0.0, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 1.00]:
                sf.add_column(SArray([self.quantile(q)]),
                              str(int(q * 100)) + '%')
            s += sf.__str__(footer=False) + "\n"
        except:
            pass

        try:
            t_k = self.dict_key_summary()
            t_v = self.dict_value_summary()
            s += "\n******** Dictionary Element Key Summary ********\n"
            s += t_k.__repr__()
            s += "\n******** Dictionary Element Value Summary ********\n"
            s += t_v.__repr__() + '\n'
        except:
            pass

        try:
            t_k = self.element_summary()
            s += "\n******** Element Summary ********\n"
            s += t_k.__repr__() + '\n'
        except:
            pass

        return s.expandtabs(8)