Example #1
0
def __update_joined_datasets(dataset, update):
    """Update any joined datasets."""
    if 'add' in update:
        new_dframe = update['add']

    for direction, other_dataset, on, j_dataset in dataset.joined_datasets:
        if 'add' in update:
            if direction == 'left':
                # only proceed if on in new dframe
                if on in new_dframe.columns:
                    left_dframe = other_dataset.dframe(padded=True)

                    # only proceed if new on value is in on column in lhs
                    if len(set(new_dframe[on]).intersection(
                            set(left_dframe[on]))):
                        merged_dframe = join_dataset(left_dframe, dataset, on)
                        j_dataset.replace_observations(merged_dframe)

                        # TODO is it OK not to propagate the join here?
            else:
                # if on in new data join with existing data
                if on in new_dframe:
                    new_dframe = join_dataset(new_dframe, other_dataset, on)

                calculate_updates(j_dataset, df_to_jsondict(new_dframe),
                                  parent_dataset_id=dataset.dataset_id)
        elif 'delete' in update:
            j_dataset.delete_observation(update['delete'])
        elif 'edit' in update:
            j_dataset.update_observation(*update['edit'])
Example #2
0
def __update_aggregate_dataset(dataset, formula, new_dframe, name, groups,
                               a_dataset, reducible):
    """Update the aggregated dataset built for `dataset` with `calculation`.

    Proceed with the following steps:

        - delete the rows in this dataset from the parent
        - recalculate aggregated dataframe from aggregation
        - update aggregated dataset with new dataframe and add parent id
        - recur on all merged datasets descending from the aggregated
          dataset

    :param formula: The formula to execute.
    :param new_dframe: The DataFrame to aggregate on.
    :param name: The name of the aggregation.
    :param groups: A column or columns to group on.
    :type group: String, list of strings, or None.
    :param a_dataset: The DataSet to store the aggregation in.
    """
    # parse aggregation and build column arguments
    aggregator = __create_aggregator(
        dataset, formula, name, groups, dframe=new_dframe)
    new_agg_dframe = aggregator.update(dataset, a_dataset, formula, reducible)

    # jsondict from new dframe
    new_data = df_to_jsondict(new_agg_dframe)

    for merged_dataset in a_dataset.merged_datasets:
        # remove rows in child from this merged dataset
        merged_dataset.remove_parent_observations(a_dataset.dataset_id)

        # calculate updates for the child
        calculate_updates(merged_dataset, new_data,
                          parent_dataset_id=a_dataset.dataset_id)
    def test_dataset_update_unicode(self):
        num_rows_before_update = 1
        data = [
            {u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2'},
            {u'\u03c7': u'\u03b3', u'\u03c8': u'\u03b4'},
        ]
        self.dataset_id = self._post_file('unicode.csv')
        self._put_row_updates(file_name='unicode.json')
        results = json.loads(self.controller.show(self.dataset_id))
        num_rows_after_update = len(results)

        self.assertEqual(num_rows_after_update, num_rows_before_update + 1)
        self._check_schema(results)

        dataset = Dataset.find_one(self.dataset_id)
        self.assertEqual(data, df_to_jsondict(dataset.dframe()))
Example #4
0
def __update_merged_datasets(dataset, update):
    if 'add' in update:
        data = df_to_jsondict(update['add'])

        # store slugs as labels for child datasets
        data = __slugify_data(data, dataset.schema.labels_to_slugs)

    # update the merged datasets with new_dframe
    for mapping, merged_dataset in dataset.merged_datasets_with_map:
        if 'add' in update:
            mapped_data = __remapped_data(dataset.dataset_id, mapping, data)
            calculate_updates(merged_dataset, mapped_data,
                              parent_dataset_id=dataset.dataset_id)
        elif 'delete' in update:
            offset = __find_merge_offset(dataset, merged_dataset)
            merged_dataset.delete_observation(update['delete'] + offset)
        elif 'edit' in update:
            offset = __find_merge_offset(dataset, merged_dataset)
            index, data = update['edit']
            merged_dataset.update_observation(index + offset, data)
Example #5
0
    def test_create_from_csv_unicode(self):
        dframe_length = 1
        dframe_data = [{u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2'}]

        _file_name = 'unicode.csv'
        self._file_path = self._file_path.replace(self._file_name, _file_name)
        result = self.__upload_mocked_file()

        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)

        dataset = Dataset.find_one(result[Dataset.ID])

        self.assertEqual(Dataset.STATE_READY, dataset.state)

        dframe = dataset.dframe()

        self.assertEqual(dframe_length, len(dframe))
        self.assertEqual(dframe_data, df_to_jsondict(dframe))

        self._test_summary_built(result)
    def test_create_from_csv_unicode(self):
        dframe_length = 1
        dframe_data = [{u'\u03c7': u'\u03b1', u'\u03c8': u'\u03b2'}]

        _file_name = 'unicode.csv'
        self._file_path = self._file_path.replace(self._file_name, _file_name)
        result = self.__upload_mocked_file()

        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)

        dataset = Dataset.find_one(result[Dataset.ID])

        self.assertEqual(Dataset.STATE_READY, dataset.state)

        dframe = dataset.dframe()

        self.assertEqual(dframe_length, len(dframe))
        self.assertEqual(dframe_data, df_to_jsondict(dframe))

        self._test_summary_built(result)
    def test_dataset_update_unicode(self):
        num_rows_before_update = 1
        data = [
            {
                u'\u03c7': u'\u03b1',
                u'\u03c8': u'\u03b2'
            },
            {
                u'\u03c7': u'\u03b3',
                u'\u03c8': u'\u03b4'
            },
        ]
        self.dataset_id = self._post_file('unicode.csv')
        self._put_row_updates(file_name='unicode.json')
        results = json.loads(self.controller.show(self.dataset_id))
        num_rows_after_update = len(results)

        self.assertEqual(num_rows_after_update, num_rows_before_update + 1)
        self._check_schema(results)

        dataset = Dataset.find_one(self.dataset_id)
        self.assertEqual(data, df_to_jsondict(dataset.dframe()))
Example #8
0
    def test_to_jsondict(self):
        jsondict = df_to_jsondict(self.dframe)
        self.assertEqual(len(jsondict), len(self.dframe))

        for col in jsondict:
            self.assertEqual(len(col), len(self.dframe.columns))
Example #9
0
 def __dataframe_as_content_type(self, content_type, dframe):
     if content_type == self.CSV:
         return df_to_csv_string(dframe)
     else:
         return df_to_jsondict(dframe)
Example #10
0
def comparable(dframe):
    return [reduce_precision(r) for r in df_to_jsondict(dframe)]
Example #11
0
 def __dataframe_as_content_type(self, content_type, dframe):
     if content_type == self.CSV:
         return df_to_csv_string(dframe)
     else:
         return df_to_jsondict(dframe)
    def test_to_jsondict(self):
        jsondict = df_to_jsondict(self.dframe)
        self.assertEqual(len(jsondict), len(self.dframe))

        for col in jsondict:
            self.assertEqual(len(col), len(self.dframe.columns))