def __update_joined_datasets(dataset, update): """Update any joined datasets.""" if 'add' in update: new_dframe = update['add'] for direction, other_dataset, on, j_dataset in dataset.joined_datasets: if 'add' in update: if direction == 'left': # only proceed if on in new dframe if on in new_dframe.columns: left_dframe = other_dataset.dframe(padded=True) # only proceed if new on value is in on column in lhs if len(set(new_dframe[on]).intersection( set(left_dframe[on]))): merged_dframe = join_dataset(left_dframe, dataset, on) j_dataset.replace_observations(merged_dframe) # TODO is it OK not to propagate the join here? else: # if on in new data join with existing data if on in new_dframe: new_dframe = join_dataset(new_dframe, other_dataset, on) calculate_updates(j_dataset, df_to_jsondict(new_dframe), parent_dataset_id=dataset.dataset_id) elif 'delete' in update: j_dataset.delete_observation(update['delete']) elif 'edit' in update: j_dataset.update_observation(*update['edit'])
def __update_aggregate_dataset(dataset, formula, new_dframe, name, groups, a_dataset, reducible): """Update the aggregated dataset built for `dataset` with `calculation`. Proceed with the following steps: - delete the rows in this dataset from the parent - recalculate aggregated dataframe from aggregation - update aggregated dataset with new dataframe and add parent id - recur on all merged datasets descending from the aggregated dataset :param formula: The formula to execute. :param new_dframe: The DataFrame to aggregate on. :param name: The name of the aggregation. :param groups: A column or columns to group on. :type group: String, list of strings, or None. :param a_dataset: The DataSet to store the aggregation in. """ # parse aggregation and build column arguments aggregator = __create_aggregator( dataset, formula, name, groups, dframe=new_dframe) new_agg_dframe = aggregator.update(dataset, a_dataset, formula, reducible) # jsondict from new dframe new_data = df_to_jsondict(new_agg_dframe) for merged_dataset in a_dataset.merged_datasets: # remove rows in child from this merged dataset merged_dataset.remove_parent_observations(a_dataset.dataset_id) # calculate updates for the child calculate_updates(merged_dataset, new_data, parent_dataset_id=a_dataset.dataset_id)
def test_dataset_update_unicode(self): num_rows_before_update = 1 data = [ {u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2'}, {u'\u03c7': u'\u03b3', u'\u03c8': u'\u03b4'}, ] self.dataset_id = self._post_file('unicode.csv') self._put_row_updates(file_name='unicode.json') results = json.loads(self.controller.show(self.dataset_id)) num_rows_after_update = len(results) self.assertEqual(num_rows_after_update, num_rows_before_update + 1) self._check_schema(results) dataset = Dataset.find_one(self.dataset_id) self.assertEqual(data, df_to_jsondict(dataset.dframe()))
def __update_merged_datasets(dataset, update): if 'add' in update: data = df_to_jsondict(update['add']) # store slugs as labels for child datasets data = __slugify_data(data, dataset.schema.labels_to_slugs) # update the merged datasets with new_dframe for mapping, merged_dataset in dataset.merged_datasets_with_map: if 'add' in update: mapped_data = __remapped_data(dataset.dataset_id, mapping, data) calculate_updates(merged_dataset, mapped_data, parent_dataset_id=dataset.dataset_id) elif 'delete' in update: offset = __find_merge_offset(dataset, merged_dataset) merged_dataset.delete_observation(update['delete'] + offset) elif 'edit' in update: offset = __find_merge_offset(dataset, merged_dataset) index, data = update['edit'] merged_dataset.update_observation(index + offset, data)
def test_create_from_csv_unicode(self): dframe_length = 1 dframe_data = [{u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2'}] _file_name = 'unicode.csv' self._file_path = self._file_path.replace(self._file_name, _file_name) result = self.__upload_mocked_file() self.assertTrue(isinstance(result, dict)) self.assertTrue(Dataset.ID in result) dataset = Dataset.find_one(result[Dataset.ID]) self.assertEqual(Dataset.STATE_READY, dataset.state) dframe = dataset.dframe() self.assertEqual(dframe_length, len(dframe)) self.assertEqual(dframe_data, df_to_jsondict(dframe)) self._test_summary_built(result)
def test_dataset_update_unicode(self): num_rows_before_update = 1 data = [ { u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2' }, { u'\u03c7': u'\u03b3', u'\u03c8': u'\u03b4' }, ] self.dataset_id = self._post_file('unicode.csv') self._put_row_updates(file_name='unicode.json') results = json.loads(self.controller.show(self.dataset_id)) num_rows_after_update = len(results) self.assertEqual(num_rows_after_update, num_rows_before_update + 1) self._check_schema(results) dataset = Dataset.find_one(self.dataset_id) self.assertEqual(data, df_to_jsondict(dataset.dframe()))
def test_to_jsondict(self): jsondict = df_to_jsondict(self.dframe) self.assertEqual(len(jsondict), len(self.dframe)) for col in jsondict: self.assertEqual(len(col), len(self.dframe.columns))
def __dataframe_as_content_type(self, content_type, dframe): if content_type == self.CSV: return df_to_csv_string(dframe) else: return df_to_jsondict(dframe)
def comparable(dframe): return [reduce_precision(r) for r in df_to_jsondict(dframe)]