def test_save(self): t = XArray([1, 2, 3]) path = '{}/tmp/array-csv'.format(hdfs_prefix) t.save(path) success_path = os.path.join(path, '_SUCCESS') self.assertTrue(fileio.is_file(success_path)) fileio.delete(path)
def test_save_format(self): t = XArray([1, 2, 3]) path = '{}/tmp/array-csv'.format(hdfs_prefix) t.save(path, format='csv') with fileio.open_file(path) as f: self.assertEqual('1', f.readline().strip()) self.assertEqual('2', f.readline().strip()) self.assertEqual('3', f.readline().strip()) fileio.delete(path)
def user_features(self): """ The user features. Underlying model parameters. """ return XArray.from_rdd(self.model.userFeatures(), list)
def item_features(self): """ The item features. Underlying model parameters. """ return XArray.from_rdd(self.model.productFeatures(), list)
def _base_predict(self, data): """ Call the model's predict function. Data can be a single item or a collection of items. """ features = self.make_features(data) if isinstance(features, DenseVector): return self.model.predict(features) if isinstance(features, XArray) and issubclass(features.dtype(), DenseVector): res = self.model.predict(features.to_spark_rdd()) return XArray.from_rdd(res, float) raise TypeError('must pass a DenseVector or XArray of DenseVector')
def crossproduct(d): """ Create an SFrame containing the crossproduct of all provided options. Parameters ---------- d : dict Each key is the name of an option, and each value is a list of the possible values for that option. Returns ------- out : SFrame There will be a column for each key in the provided dictionary, and a row for each unique combination of all values. Example ------- settings = {'argument_1':[0, 1], 'argument_2':['a', 'b', 'c']} print crossproduct(settings) +------------+------------+ | argument_2 | argument_1 | +------------+------------+ | a | 0 | | a | 1 | | b | 0 | | b | 1 | | c | 0 | | c | 1 | +------------+------------+ [6 rows x 2 columns] """ from xframes import XArray d = [zip(d.keys(), x) for x in itertools.product(*d.values())] sa = [{k: v for (k, v) in x} for x in d] return XArray(sa).unpack(column_name_prefix='')
def test_construct_local_file_dict(self): t = XArray('{}/user/xpatterns/files/test-array-dict'.format(hdfs_prefix)) self.assertEqualLen(4, t) self.assertIs(dict, t.dtype()) self.assertDictEqual({1: 'a', 2: 'b'}, t[0])
def test_construct_local_file_list(self): t = XArray('{}/user/xpatterns/files/test-array-list'.format(hdfs_prefix)) self.assertEqualLen(4, t) self.assertIs(list, t.dtype()) self.assertListEqual([1, 2], t[0])
def test_construct_local_file_str(self): t = XArray('{}/user/xpatterns/files/test-array-str'.format(hdfs_prefix)) self.assertEqualLen(4, t) self.assertIs(str, t.dtype()) self.assertEqual('a', t[0])
def test_construct_local_file_float(self): t = XArray('{}/user/xpatterns/files/test-array-float'.format(hdfs_prefix)) self.assertEqualLen(4, t) self.assertIs(float, t.dtype()) self.assertEqual(1.0, t[0])
def test_construct_file_int(self): path = '{}/user/xpatterns/files/test-array-int'.format(hdfs_prefix) t = XArray(path) self.assertEqualLen(4, t) self.assertIs(int, t.dtype()) self.assertEqual(1, t[0])