Exemple #1
0
 def test_set_specification(self):
     """Test if the set-specs subcommand of the CLI worls properly."""
     org_specs = pr.get_schema_specs("pysemantic")
     cmd = ['semantic', 'set-specs', 'pysemantic', '--dataset', 'iris',
            '--dlm', '|']
     try:
         subprocess.check_call(cmd, env=self.testenv)
         new_specs = pr.get_schema_specs("pysemantic", "iris")
         self.assertEqual(new_specs['delimiter'], '|')
     finally:
         for dataset_name, specs in org_specs.iteritems():
             pr.set_schema_specs("pysemantic", dataset_name, **specs)
 def test_set_specification(self):
     """Test if the set-specs subcommand of the CLI worls properly."""
     org_specs = pr.get_schema_specs("pysemantic")
     cmd = ['semantic', 'set-specs', 'pysemantic', '--dataset', 'iris',
            '--dlm', '|']
     try:
         subprocess.check_call(cmd, env=self.testenv)
         new_specs = pr.get_schema_specs("pysemantic", "iris")
         self.assertEqual(new_specs['delimiter'], '|')
     finally:
         for dataset_name, specs in org_specs.iteritems():
             pr.set_schema_specs("pysemantic", dataset_name, **specs)
 def test_random_row_selection(self):
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = dict(random=True, count=50)
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 50)
     ideal_ix = np.arange(50)
     self.assertFalse(np.all(loaded.index.values == ideal_ix))
 def test_random_row_selection(self):
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = dict(random=True, count=50)
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 50)
     ideal_ix = np.arange(50)
     self.assertFalse(np.all(loaded.index.values == ideal_ix))
Exemple #5
0
 def test_random_row_selection_within_range(self):
     """Check if randomly selecting rows within a range works."""
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = {'range': [25, 75], 'count': 10, 'random': True}
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 10)
     ix = loaded.index.values
     self.assertTrue(ix.max() <= 50)
Exemple #6
0
 def test_row_selection_random_range(self):
     """Check if a range of rows can be selected from the dataset."""
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = {'range': [25, 75], 'random': True}
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 50)
     ideal_ix = np.arange(50)
     self.assertFalse(np.all(loaded.index.values == ideal_ix))
 def test_nrows_callable(self):
     """Check if specifying the nrows argument as a callable works."""
     nrows = lambda x: np.remainder(x, 2) == 0
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = nrows
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 75)
     ideal_ix = np.arange(150, step=2)
     np.testing.assert_allclose(ideal_ix, loaded.index.values)
 def test_nrows_callable(self):
     """Check if specifying the nrows argument as a callable works."""
     nrows = lambda x: np.remainder(x, 2) == 0
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = nrows
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 75)
     ideal_ix = np.arange(150, step=2)
     np.testing.assert_allclose(ideal_ix, loaded.index.values)
 def test_random_row_selection_within_range(self):
     """Check if randomly selecting rows within a range works."""
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = {'range': [25, 75], 'count': 10, 'random': True}
     iris_specs['header'] = 0
     del iris_specs['dtypes']
     iris_specs['column_names'] = colnames(iris_specs['path'])
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 10)
     ix = loaded.index.values
     self.assertTrue(ix.max() <= 50)
 def test_row_selection_range(self):
     """Check if a range of rows can be selected from the dataset."""
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = {'range': [25, 75]}
     iris_specs['header'] = 0
     del iris_specs['dtypes']
     iris_specs['column_names'] = colnames(iris_specs['path'])
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 50)
     ideal_ix = np.arange(50)
     self.assertTrue(np.allclose(loaded.index.values, ideal_ix))
Exemple #11
0
 def test_row_selection_random_range(self):
     """Check if a range of rows can be selected from the dataset."""
     iris_specs = pr.get_schema_specs("pysemantic", "iris")
     iris_specs['nrows'] = {'range': [25, 75], 'random': True}
     iris_specs['header'] = 0
     del iris_specs['dtypes']
     iris_specs['column_names'] = colnames(iris_specs['path'])
     project = pr.Project(schema={'iris': iris_specs})
     loaded = project.load_dataset('iris')
     self.assertEqual(loaded.shape[0], 50)
     ideal_ix = np.arange(50)
     self.assertFalse(np.all(loaded.index.values == ideal_ix))
 def test_init_project_yaml_dump(self):
     """Test initialization of Project class with the raw yaml dump."""
     project_specs = pr.get_schema_specs('pysemantic')
     project = pr.Project(schema=project_specs)
     loaded = project.load_datasets()
     self.assertItemsEqual(loaded.keys(), ('iris', 'person_activity',
                                           'multi_iris', 'bad_iris',
                                           'random_row_iris'))
     dframe = pd.read_csv(**self.expected_specs['iris'])
     self.assertDataFrameEqual(loaded['iris'], dframe)
     dframe = pd.read_csv(**self.expected_specs['person_activity'])
     self.assertDataFrameEqual(loaded['person_activity'], dframe)
     dframes = [pd.read_csv(**args) for args in
            self.expected_specs['multi_iris']]
     dframes = [x.drop_duplicates() for x in dframes]
     dframe = pd.concat(dframes)
     dframe.set_index(np.arange(dframe.shape[0]), inplace=True)
     self.assertDataFrameEqual(loaded['multi_iris'], dframe)
 def test_add_dataset(self):
     """Test if the add-dataset subcommand adds datasets to projects."""
     tempdir = tempfile.mkdtemp()
     outfile = op.join(tempdir, "testdata.csv")
     dframe = pd.DataFrame(np.random.random((10, 2)), columns=['a', 'b'])
     dframe.to_csv(outfile, index=False)
     cmd = ("semantic add-dataset testdata --project pysemantic --path {}"
            " --dlm ,")
     cmd = cmd.format(outfile).split(" ")
     try:
         subprocess.check_call(cmd, env=self.testenv)
         _pr = pr.Project("pysemantic")
         self.assertIn("testdata", _pr.datasets)
         specs = dict(path=outfile, delimiter=',')
         actual = pr.get_schema_specs("pysemantic", "testdata")
         self.assertKwargsEqual(specs, actual)
     finally:
         pr.remove_dataset("pysemantic", "testdata")
         shutil.rmtree(tempdir)
 def test_add_dataset(self):
     """Test if adding datasets programmatically works fine."""
     tempdir = tempfile.mkdtemp()
     outpath = op.join(tempdir, "foo.csv")
     dframe = pd.DataFrame(np.random.random((10, 10)))
     dframe.to_csv(outpath, index=False)
     specs = dict(path=outpath, delimiter=',', nrows=10)
     try:
         pr.add_dataset("pysemantic", "sample_dataset", specs)
         parsed_specs = pr.get_schema_specs("pysemantic", "sample_dataset")
         self.assertKwargsEqual(specs, parsed_specs)
     finally:
         shutil.rmtree(tempdir)
         with open(TEST_DATA_DICT, "r") as fileobj:
             test_specs = yaml.load(fileobj, Loader=Loader)
         del test_specs['sample_dataset']
         with open(TEST_DATA_DICT, "w") as fileobj:
             yaml.dump(test_specs, fileobj, Dumper=Dumper,
                       default_flow_style=False)
Exemple #15
0
 def test_add_dataset(self):
     """Test if the add-dataset subcommand adds datasets to projects."""
     tempdir = tempfile.mkdtemp()
     outfile = op.join(tempdir, "testdata.csv")
     dframe = pd.DataFrame(np.random.random((10, 2)), columns=['a', 'b'])
     dframe.to_csv(outfile, index=False)
     cmd = ("semantic add-dataset testdata --project pysemantic --path {}"
            " --dlm ,")
     cmd = cmd.format(outfile).split(" ")
     try:
         subprocess.check_call(cmd, env=self.testenv)
         _pr = pr.Project("pysemantic")
         self.assertIn("testdata", _pr.datasets)
         specs = dict(path=outfile, delimiter=',')
         actual = pr.get_schema_specs("pysemantic", "testdata")
         self.assertKwargsEqual(specs, actual)
     finally:
         pr.remove_dataset("pysemantic", "testdata")
         shutil.rmtree(tempdir)
Exemple #16
0
 def test_init_project_yaml_dump(self):
     """Test initialization of Project class with the raw yaml dump."""
     project_specs = pr.get_schema_specs('pysemantic')
     project = pr.Project(schema=project_specs)
     loaded = project.load_datasets()
     self.assertItemsEqual(loaded.keys(),
                           ('iris', 'person_activity', 'multi_iris',
                            'bad_iris', 'random_row_iris'))
     dframe = pd.read_csv(**self.expected_specs['iris'])
     self.assertDataFrameEqual(loaded['iris'], dframe)
     dframe = pd.read_csv(**self.expected_specs['person_activity'])
     self.assertDataFrameEqual(loaded['person_activity'], dframe)
     dframes = [
         pd.read_csv(**args) for args in self.expected_specs['multi_iris']
     ]
     dframes = [x.drop_duplicates() for x in dframes]
     dframe = pd.concat(dframes)
     dframe.set_index(np.arange(dframe.shape[0]), inplace=True)
     self.assertDataFrameEqual(loaded['multi_iris'], dframe)
Exemple #17
0
 def test_add_dataset(self):
     """Test if adding datasets programmatically works fine."""
     tempdir = tempfile.mkdtemp()
     outpath = op.join(tempdir, "foo.csv")
     dframe = pd.DataFrame(np.random.random((10, 10)))
     dframe.to_csv(outpath, index=False)
     specs = dict(path=outpath, delimiter=',', nrows=10)
     try:
         pr.add_dataset("pysemantic", "sample_dataset", specs)
         parsed_specs = pr.get_schema_specs("pysemantic", "sample_dataset")
         self.assertKwargsEqual(specs, parsed_specs)
     finally:
         shutil.rmtree(tempdir)
         with open(TEST_DATA_DICT, "r") as fileobj:
             test_specs = yaml.load(fileobj, Loader=Loader)
         del test_specs['sample_dataset']
         with open(TEST_DATA_DICT, "w") as fileobj:
             yaml.dump(test_specs,
                       fileobj,
                       Dumper=Dumper,
                       default_flow_style=False)
Exemple #18
0
 def test_get_schema_spec(self):
     """Test the module level function to get schema specifications."""
     specs = pr.get_schema_specs("pysemantic")
     self.assertKwargsEqual(specs, self.data_specs)
 def test_get_schema_spec(self):
     """Test the module level function to get schema specifications."""
     specs = pr.get_schema_specs("pysemantic")
     self.assertKwargsEqual(specs, self.data_specs)