class QueryTest(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR + '/census.csv') with open(RSC_DIR + '/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes=schema['attributes'], index=schema['index']) self.table = Table('census', self.schema).data(self.df) def tearDown(self): names.clear() def testCreation(self): qc = QueryCondition(data=self.table) self.assertEqual(qc.query, {}) self.assertEqual(set(qc.included_items()), set(self.table.index_items())) qc2 = QueryCondition(data=self.table, query={'State':'NY'}) self.assertEqual(set(qc2.included_items()), set(['NY'])) def testIncludedItems(self): qc = QueryCondition(data=self.table) self.assertEqual(set(qc.included_items()), set(self.table.index_items())) def testExcludedItems(self): qc = QueryCondition(data=self.table) self.assertItemsEqual(qc.excluded_items(), []) qc2 = QueryCondition(data=self.table, query={'State':'NY'}) excluded = set(qc.included_items()) - set(['NY']) self.assertEqual(set(qc2.excluded_items()), excluded)
def testRowCount(self): table = Table('census', self.schema).data(self.df) self.assertEqual(table.row_count(), 51) view = table.find({'$or':[{'State': 'NY'},{'State': 'DC'}]}) self.assertEqual(view.row_count(), 2) view2 = view.find({'State': 'NY'}) self.assertEqual(view2.row_count(), 1)
def testColumnNames(self): table = Table('census', self.schema).data(self.df) self.assertEqual(len(table.column_names()), 22) view = table.find({}, {'Information':True, 'State':True}) self.assertEqual(view.column_names(), ['Information', 'State']) view2 = view.find({}, {'State':True}) self.assertEqual(view2.column_names(), ['State'])
def testColumnCount(self): table = Table('census', self.schema).data(self.df) self.assertEqual(table.column_count(), 22) view = table.find({}, {'Information':True, 'State':True}) self.assertEqual(view.column_count(), 2) view2 = view.find({}, {'State':True}) self.assertEqual(view2.column_count(), 1)
def testRemoveEvent(self): table = Table('census', self.schema).data(self.df) query = {'State': 'DC'} table.subscribe_once('remove', self.callback) self.callback_executed = False table.remove(query) self.assertTrue(self.callback_executed)
def testIndexItems(self): table = Table('census', self.schema).data(self.df) table.insert({'State': 'DC', 'life_meaning':42}) view = table.find({'$or':[{'State': 'NY'},{'State': 'DC'}]}) self.assertIsInstance(view, TableView) distincts = view.index_items() self.assertEqual(len(distincts), 2) for result in distincts: self.assertIn(result, ['DC','NY'])
class TestItemExplicitSieve(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) def tearDown(self): names.clear() def testCreate(self): s = ItemExplicitSieve(self.table, {}) self.assertEqual(s.query , {}) def testDomain(self): s = ItemExplicitSieve(self.table, {}) self.assertSetEqual(s.domain , set(self.table.index_items())) def testIndex(self): s = ItemExplicitSieve(self.table, {}) self.assertEqual(s.index , set(self.table.index_items())) s.query = {'Information': {'$gt': 200000}} self.assertEqual(s.index , set(['CA', 'TX', 'NY'])) def testQuery(self): s = ItemExplicitSieve(self.table, {'Information': {'$gt': 200000}}) self.assertEqual(s.query , {'Information': {'$gt': 200000}}) def testUnion(self): s = ItemExplicitSieve(self.table, {'Information': {'$gt': 200000}}) s.union({'State' : {'$in': ['NY', 'DC']}}) self.assertEqual(s.query , {'$or': [{'Information': {'$gt': 200000}}, {'State': {'$in': ['NY', 'DC']}}]}) self.assertEqual(s.index , set(['CA', 'TX', 'NY', 'DC'])) def testSubstract(self): s = ItemExplicitSieve(self.table, {'Information': {'$gt': 200000}}) s.substract({'State' : {'$in': ['NY', 'DC']}}) self.assertEqual(s.query , {'$and': [{'Information': {'$gt': 200000}}, {'$nor': [{'State': {'$in': ['NY', 'DC']}}]}]}) self.assertEqual(s.index , set(['CA', 'TX'])) def testToggle(self): s = ItemExplicitSieve(self.table, {'Information': {'$lt': 200000}}) s.toggle() self.assertEqual(s.query , {'$nor': [{'Information': {'$lt': 200000}}]}) self.assertEqual(s.index , set(['CA', 'TX', 'NY'])) def testIntersect(self): s = ItemExplicitSieve(self.table, {'Information': {'$gt': 200000}}) s.intersect({'State' : {'$in': ['NY', 'DC']}}) self.assertEqual(s.index , set(['NY'])) self.assertEqual(s.query , {'$and': [{'Information': {'$gt': 200000}}, {'State': {'$in': ['NY', 'DC']}}]})
def read_csv(table_name, filepath, schema=None, fillna="NaN", *args, **kwargs): ''' This function creates a table with the data from a CSV file. The schema is inferred from data. :param str table_name: The name you want to give to the new table :param str filepath: The path where the csv file is located. The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could be file://localhost/path/to/table.csv :param str schema: The schema to use in the creation of the table. If None then the schema will be infered from the data. The string could be a schema's json representation or the local filepath of the json file that conatins the schema information. :return: Table This functions is a simple wrapper for `pandas.read_csv` function, and so any optional provided arguments are going to be bypassed to `pandas.read_csv` function. Common parameters are: sep : string, default ',' Delimiter to use. If sep is None, will try to automatically determine this. Regular expressions are accepted. names : array-like List of column names to use. If file contains no header row, then you should explicitly pass header=None prefix : string or None (default) Prefix to add to column numbers when no header, e.g 'X' for X0, X1, ... na_values : list-like or dict, default None Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values true_values : list Values to consider as True false_values : list Values to consider as False keep_default_na : bool, default True If na_values are specified and keep_default_na is False the default NaN values are overridden, otherwise they're appended to ''' if schema is not None: if os.path.exists(schema): with open(schema) as f: schema = json.load(f, object_pairs_hook=collections.OrderedDict) else: # Assume the string is the json representation schema = json.loads(schema, object_pairs_hook=collections.OrderedDict) df = pd.read_csv(filepath, *args, **kwargs) df.fillna(fillna, inplace=True) table = Table(name=table_name, schema=schema) table.data(df) return table
def testMultiFind(self): table = Table('census', self.schema).data(self.df) view = table.find({'$or':[{'State': 'NY'},{'State': 'DC'}]}) view2 = view.find({'Information':{'$gt': 200000}}) self.assertIsInstance(view, TableView) for result in view.get_data(): self.assertIn(result['State'], ['DC','NY']) for result in view2.get_data(): self.assertNotIn(result['State'], ['DC'])
def setUp(self): self.df = pd.read_csv(RSC_DIR + '/census.csv') with open(RSC_DIR + '/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes=schema['attributes'], index=schema['index']) self.table = Table('census', self.schema).data(self.df)
def testDistinct(self): table = Table('census', self.schema).data(self.df) table.insert({'State': 'DC', 'life_meaning':42}) view = table.find({'$or':[{'State': 'NY'},{'State': 'DC'},{'State': 'CA'}]}) distincts = view.distinct('State') self.assertEqual(len(distincts), 3) for result in distincts: self.assertIn(result, ['DC','NY','CA']) distinct_view = view.distinct('State', as_view=True) self.assertIsInstance(distinct_view, TableView) result = distinct_view.get_data(outtype='c_list') self.assertEqual(result, {'State': ['NY', 'DC', 'CA']}) view2 = view.find({'Information':{'$gt': 200000}}) result = view2.distinct('State') self.assertEqual(result, ['NY', 'CA'])
class TestItemImplicitSieve(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) def tearDown(self): names.clear() def testCreate(self): s = ItemImplicitSieve(self.table, []) self.assertEqual(s.index , set([])) def testDomain(self): s = ItemImplicitSieve(self.table, []) self.assertSetEqual(s.domain , set(self.table.index_items())) def testIndex(self): s = ItemImplicitSieve(self.table, []) self.assertEqual(s.index , set([])) s.index = ['DC', 'NY'] self.assertEqual(s.index , set(['DC', 'NY'])) def testQuery(self): s = ItemImplicitSieve(self.table, ['DC', 'NY']) self.assertEqual(s.query , {self.table.index : {'$in': ['NY', 'DC']}}) def testUnion(self): s = ItemImplicitSieve(self.table, []) s.union(['DC']) s.union(['NY']) self.assertEqual(s.query , {self.table.index : {'$in': ['NY', 'DC']}}) s.union(['DC', 'NY']) self.assertEqual(s.query , {self.table.index : {'$in': ['NY', 'DC']}}) def testSubstract(self): s = ItemImplicitSieve(self.table, []) s.substract(['DC']) self.assertEqual(s.index , set([])) s.index = ['DC', 'NY'] s.substract(['DC']) self.assertEqual(s.index , set(['NY'])) def testToggle(self): s = ItemImplicitSieve(self.table, []) s.index = s.domain.difference(['NY', 'DC']) s.toggle() self.assertEqual(s.index , set(['NY', 'DC'])) def testIntersect(self): s = ItemImplicitSieve(self.table, ['NY', 'DC', 'WA']) s.intersect(['NY', 'DC']) self.assertEqual(s.index , set(['NY', 'DC']))
def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) self.table.add_column('fake_cat', 'CATEGORICAL') items = self.table.index_items() fake_cat = ['C1','C2','C3','C4'] for i, item in enumerate(items): self.table.update({'State':item}, {'$set': {'fake_cat': fake_cat[ i % 4]}})
class TestAttributeImplicitSieve(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) def tearDown(self): names.clear() def testDomain(self): s = AttributeImplicitSieve(self.table, []) self.assertSetEqual(s.domain , set(self.table.column_names())) def testProjection(self): s = AttributeImplicitSieve(self.table, []) self.assertEqual(s.projection , {}) s.index = ['Information', 'State'] self.assertEqual(s.projection , {u'Information': True, u'State': True})
class CategoricalTest(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) self.table.add_column('fake_cat', 'CATEGORICAL') items = self.table.index_items() fake_cat = ['C1','C2','C3','C4'] for i, item in enumerate(items): self.table.update({'State':item}, {'$set': {'fake_cat': fake_cat[ i % 4]}}) def tearDown(self): names.clear() def testCreation(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') self.assertEqual(cc.included_categories(), []) with self.assertRaises(NotImplementedError): cc = CategoricalCondition(data=self.table, attr='Information') def testAdd(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category('C1') self.assertEqual(set(cc.included_categories()), set(['C1'])) cc.add_category(['C1', 'C3']) self.assertEqual(set(cc.included_categories()), set(['C1', 'C3'])) def testRemove(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category(['C1', 'C3']) cc.remove_category('C1') self.assertEqual(set(cc.included_categories()), set(['C3'])) def testToggleItem(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category(['C1', 'C3']) cc.toggle_category('C1') cc.toggle_category('C2') self.assertEqual(set(cc.included_categories()), set(['C3', 'C2'])) cc.toggle_category(['C2','C4']) self.assertEqual(set(cc.included_categories()), set(['C3', 'C4'])) def testIncludeAll(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.include_all() self.assertSetEqual(set(cc.included_categories()), set(self.table.distinct('fake_cat'))) def testExcludeAll(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category('C1') cc.exclude_all() self.assertEqual(cc.included_categories(),[]) def testToggle(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category('C1') cc.toggle() self.assertEqual(set(cc.included_categories()), set(['C2', 'C3', 'C4'])) def testIncludedItems(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category('C1') self.assertSetEqual(set(cc.included_items()), set(['AK', 'CA', 'DE', 'IA', 'KS', 'MD', 'MO', 'ND', 'NM', 'OK', 'SC', 'UT', 'WI'])) cc.add_category(['C2', 'C3', 'C4']) self.assertSetEqual(set(cc.included_items()), set(self.table.distinct('State'))) def testExcludedItems(self): cc = CategoricalCondition(data=self.table, attr='fake_cat') cc.add_category(['C2', 'C3', 'C4']) self.assertSetEqual(set(cc.excluded_items()), set(['AK', 'CA', 'DE', 'IA', 'KS', 'MD', 'MO', 'ND', 'NM', 'OK', 'SC', 'UT', 'WI'])) cc.add_category('C1') self.assertSetEqual(set(cc.excluded_items()), set([]))
class RangeTest(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR+'/census.csv') with open(RSC_DIR+'/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes = schema['attributes'], index = schema['index']) self.table = Table('census', self.schema).data(self.df) def tearDown(self): names.clear() def testCreation(self): rc = RangeCondition(data=self.table, attr='Information') self.assertEqual(rc.domain['min'], rc.range['min']) self.assertEqual(rc.domain['max'], rc.range['max']) self.assertEqual(rc.range['relative_max'], 1) self.assertEqual(rc.range['relative_min'], 0) self.assertEqual(set(rc.included_items()), set(self.table.index_items())) rc2 = RangeCondition(data=self.table, attr='Information', range=dict(min=250000, max=500000)) self.assertEqual(rc2.domain['max'], rc2.range['max']) self.assertEqual(250000, rc2.range['min']) self.assertEqual(rc2.range['relative_max'], 1) self.assertNotEqual(rc2.range['relative_min'], 0) self.assertEqual(set(rc2.included_items()), set(['CA', 'NY'])) rc3 = RangeCondition(data=self.table, attr='Information', domain=dict(min=250000, max=500000)) self.assertEqual(rc3.domain['min'], rc3.range['min']) self.assertEqual(rc3.domain['max'], rc3.range['max']) self.assertEqual(rc3.range['relative_max'], 1) self.assertEqual(rc3.range['relative_min'], 0) self.assertEqual(set(rc3.included_items()), set(['CA', 'NY'])) def testIncludeAll(self): rc = RangeCondition(data=self.table, attr='Information', range=dict(min=250000, max=500000)) self.assertEqual(set(rc.included_items()), set(['CA', 'NY'])) rc.include_all() self.assertEqual(set(rc.included_items()), set(self.table.index_items())) def testIncludedItems(self): rc = RangeCondition(data=self.table, attr='Information') self.assertEqual(set(rc.included_items()), set(self.table.index_items())) def testExcludedItems(self): rc = RangeCondition(data=self.table, attr='Information') self.assertItemsEqual(rc.excluded_items(), []) rc2 = RangeCondition(data=self.table, attr='Information', range=dict(min=250000, max=500000)) excluded = set(rc.included_items()) - set(['CA', 'NY']) self.assertEqual(set(rc2.excluded_items()), excluded) def testSetRange(self): rc = RangeCondition(data=self.table, attr='Information') self.assertItemsEqual(rc.included_items(), self.table.index_items()) rc.set_range(min=250000) self.assertEqual(set(rc.included_items()), set(['CA', 'NY'])) change = rc.set_range(max=250000) self.assertItemsEqual(rc.included_items(), []) self.assertEqual(change, {'included': [], 'excluded': [u'NY', u'CA']}) with self.assertRaises(ValueError): rc.set_range() change = rc.set_range(0,1, relative=True) self.assertItemsEqual(rc.included_items(), self.table.index_items()) self.assertItemsEqual(change['included'], rc.included_items()) self.assertItemsEqual(change['excluded'], []) rc.set_range(0.5, relative=True) self.assertEqual(set(rc.included_items()), set(['CA', 'NY'])) self.assertEqual(rc.range, {'max': 492737.0, 'min': 248347.0, 'relative_max': 1.0, 'relative_min': 0.5})
def testInsert(self): table = Table('census', self.schema).data(self.df) c1 = table.row_count() table.insert({'State': 'ES', 'life_meaning':42}) self.assertEqual(table.row_count() - c1, 1) c2 = table.row_count() table.insert([{'State': 'ES', 'life_meaning':42}, {'State': 'ES2', 'life_meaning':42},]) self.assertEqual(table.row_count() - c2, 2) view = table.find({'life_meaning': {'$exists':True}}) self.assertEqual(view.row_count(), 3) self.assertEqual(table.find_one({'life_meaning': {'$exists':True}})['life_meaning'], 42)
class ConditionSetTest(unittest.TestCase): def setUp(self): self.df = pd.read_csv(RSC_DIR + '/census.csv') with open(RSC_DIR + '/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes=schema['attributes'], index=schema['index']) self.table = Table('census', self.schema).data(self.df) self.table.add_column('fake_cat', 'CATEGORICAL') items = self.table.index_items() fake_cat = ['C1','C2','C3','C4'] self.fake_sets = {} for k in fake_cat: self.fake_sets.setdefault(k, []) for i, item in enumerate(items): self.table.update({'State':item}, {'$set': {'fake_cat': fake_cat[i % 4]}}) self.fake_sets[fake_cat[i % 4]].append(item) def tearDown(self): names.clear() def testCreation(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') self.assertEqual(cs.is_empty(), True) self.assertEqual(cs.reference, []) self.assertEqual(cs.query, {'State': {'$in': []}}) self.assertEqual(cs.projection, {}) def testAddCondition(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') cc = CategoricalCondition(data=self.table, attr='fake_cat') cs.add_condition(cc) cc.add_category('C1') self.assertItemsEqual(cs.reference, self.fake_sets['C1']) cc.add_category(['C1', 'C3']) c1_and_c3 = self.fake_sets['C1'][:] c1_and_c3 += self.fake_sets['C3'] self.assertItemsEqual(cs.reference, c1_and_c3) def testAddSeveralConditions(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') cc = CategoricalCondition(data=self.table, attr='fake_cat') rc = RangeCondition(data=self.table, attr='Information') rc.set_range(0.5, relative=True) cs.add_condition(cc) cc.add_category('C1') self.assertItemsEqual(cs.reference, self.fake_sets['C1']) cs.add_condition(rc) self.assertItemsEqual(cs.reference, ['CA']) cc.toggle_category(['C1', 'C3']) self.assertItemsEqual(cs.reference, ['NY']) rc.set_range(0, 1, relative=True) self.assertItemsEqual(cs.reference, self.fake_sets['C3']) def testRemoveCondition(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') cc = CategoricalCondition(data=self.table, attr='fake_cat') cs.add_condition(cc) cc.add_category('C1') self.assertItemsEqual(cs.reference, self.fake_sets['C1']) cs.remove_condition(cc) self.assertEqual(cs.reference, []) def testEnablingChanges(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') cc = CategoricalCondition(data=self.table, attr='fake_cat') rc = RangeCondition(data=self.table, attr='Information') rc.set_range(0.5, relative=True) cs.add_condition(cc) cc.add_category('C1') self.assertItemsEqual(cs.reference, self.fake_sets['C1']) cc.disable() print cs.query, cs.reference self.assertItemsEqual(cs.reference, []) cs.add_condition(rc) self.assertItemsEqual(cs.reference, ['CA', 'NY']) rc.enable(False) cc.enable() self.assertItemsEqual(cs.reference, self.fake_sets['C1']) cc.toggle_category(['C1', 'C3']) rc.enable() self.assertItemsEqual(cs.reference, ['NY']) rc.set_range(0, 1, relative=True) self.assertItemsEqual(cs.reference, self.fake_sets['C3']) def testGrammar(self): cs = ConditionSet(name='condition_set', data=self.table, setop='AND') cc = CategoricalCondition(data=self.table, attr='fake_cat', name='catc') rc = RangeCondition(data=self.table, attr='Information', name='rangec') ac = AttributeCondition(data=self.table, name='attrc') cs.add_condition(cc) cs.add_condition(rc) cs.add_condition(ac) print cs.grammar self.maxDiff = None self.assertDictEqual(cs.grammar, {'setop': 'AND', 'conditions': ['rangec','catc','attrc'], 'data': 'census', 'name': 'condition_set'})
def testFindOne(self): table = Table('census', self.schema).data(self.df) result = table.find_one({'$or':[{'State': 'NY'},{'State': 'DC'}]}) self.assertIsInstance(result, dict) self.assertIn(result['State'], ['DC','NY'])
def testUpdate(self): table = Table('census', self.schema).data(self.df) val = table.find_one({'State': 'DC'}, {'Information':True})['Information'] val -= 2000 table.update({'State': 'DC'}, {'$set': {'Information':val}}) self.assertEqual(table.find_one({'State': 'DC'}, {'Information':True})['Information'], val)
def testAddEvent(self): table = Table('census', self.schema).data(self.df) table.subscribe_once('add', self.callback) self.callback_executed = False table.insert({'State': 'ES', 'life_meaning':42}) self.assertTrue(self.callback_executed)
def testUpdateEvent(self): table = Table('census', self.schema).data(self.df) table.subscribe_once('update', self.callback) self.callback_executed = False table.update({'State': 'DC'}, {'$set': {'Information':2000}}) self.assertTrue(self.callback_executed)
def testRemove(self): table = Table('census', self.schema).data(self.df) query = {'State': 'DC'} c1 = table.find(query).row_count() table.remove(query) self.assertGreater(c1, table.find(query).row_count())
def testFind(self): table = Table('census', self.schema).data(self.df) view = table.find({'$or':[{'State': 'NY'},{'State': 'DC'}]}) self.assertIsInstance(view, TableView) for result in view.get_data(): self.assertIn(result['State'], ['DC','NY'])
def testCheckIndex(self): table = Table('census', self.schema).data(self.df) with self.assertRaises(ValueError): table._check_index({'life_meaning':42}) table._check_index({'State': 'ES', 'life_meaning':42})
class RootTest(unittest.TestCase): def setUp(self): self.root = Root("root") self.root2 = Root("root2") self.createTable() self.createConditions() self.createDynamics() self.createSharedObject() def tearDown(self): names.clear() def createTable(self): self.df = pd.read_csv(RSC_DIR + '/census.csv') with open(RSC_DIR + '/schema_census') as f: schema = json.loads(f.read()) self.schema = OrderedDict(attributes=schema['attributes'], index=schema['index']) self.table = Table('census', self.schema).data(self.df) self.table.add_column('fake_cat', 'CATEGORICAL') items = self.table.index_items() fake_cat = ['C1','C2','C3','C4'] for i, item in enumerate(items): self.table.update({'State':item}, {'$set': {'fake_cat': fake_cat[i % 4]}}) self.root.add_dataset(self.table) def createConditions(self): self.categorical_condition = CategoricalCondition(data=self.table, attr='fake_cat') self.range_condition = RangeCondition(data=self.table, attr='Information', range=dict(min=250000, max=500000)) self.query_condition = QueryCondition(data=self.table, query={'State':'NY'}) self.attribute_condition = AttributeCondition(data=self.table, attributes=['Information']) self.consition_set = ConditionSet(name='condition_set', data=self.table, setop='AND') self.consition_set.add_condition(self.categorical_condition) self.consition_set.add_condition(self.range_condition) self.consition_set.add_condition(self.query_condition) self.consition_set.add_condition(self.attribute_condition) self.root.add_condition(self.categorical_condition) self.root.add_condition(self.range_condition) self.root.add_condition(self.query_condition) self.root.add_condition(self.attribute_condition) def createDynamics(self): self.filter1 = DynFilter("filer1", self.table) self.filter1.add_condition(self.range_condition) self.filter2 = DynFilter("filer2", self.table) self.filter2.add_condition(self.query_condition) self.filter2.add_condition(self.attribute_condition) self.selection = DynSelect("selection", self.table) self.selection.add_condition(self.categorical_condition) self.root.add_dynamic(self.filter1) self.root.add_dynamic(self.filter2) self.root.add_dynamic(self.selection) def createSharedObject(self): self.shared_object = SharedObject("shared_object", {'a':42, 'b':[1,2,3,4]}) self.root.add_dataset(self.shared_object) def testCreation(self): from pprint import pprint grammar = self.root.grammar pprint(grammar) names.clear() objects = Root.build(grammar) self.assertIn(self.table.name, objects) self.assertIn(self.attribute_condition.name, objects) self.assertIn(self.categorical_condition.name, objects) self.assertIn(self.range_condition.name, objects) self.assertIn(self.query_condition.name, objects) self.assertIn(self.filter1.name, objects) self.assertIn(self.filter2.name, objects) self.assertIn(self.selection.name, objects) self.assertIn(self.shared_object.name, objects) self.assertIsInstance(self.table, Table) self.assertIsInstance(self.attribute_condition, AttributeCondition) self.assertIsInstance(self.categorical_condition, CategoricalCondition) self.assertIsInstance(self.range_condition, RangeCondition) self.assertIsInstance(self.query_condition, QueryCondition) self.assertIsInstance(self.filter1, DynFilter) self.assertIsInstance(self.filter2, DynFilter) self.assertIsInstance(self.selection, DynSelect) self.assertIsInstance(self.shared_object, SharedObject)
def testGetData(self): table = Table('census', self.schema).data(self.df) self.assertEqual(len(table.get_data()), len(self.df))