Exemple #1
0
 def test_delete(self):
     self._save_observations()
     records = Observation.find(self.dataset)
     self.assertNotEqual(records, [])
     Observation.delete_all(self.dataset)
     records = [x for x in Observation.find(self.dataset)]
     self.assertEqual(records, [])
Exemple #2
0
    def remove_parent_observations(self, parent_id):
        """Remove obervations for this dataset with the passed `parent_id`.

        :param parent_id: Remove observations with this ID as their parent
            dataset ID.
        """
        Observation.delete_all(self, {PARENT_DATASET_ID: parent_id})
Exemple #3
0
        def action(dataset, data=data):
            data = safe_json_loads(data)
            Observation.update(dataset, int(index), data)

            return {
                self.SUCCESS: 'Updated row with index "%s".' % index,
                Dataset.ID: dataset_id}
Exemple #4
0
 def _save_records(self):
     Observation.save(self.get_data('good_eats.csv'),
                      self.dataset)
     records = Observation.find(self.dataset)
     self.assertTrue(isinstance(records, list))
     self.assertTrue(isinstance(records[0], dict))
     self.assertTrue('_id' in records[0].keys())
     return records
Exemple #5
0
    def test_delete_all(self):
        self.__save_records()
        records = Observation.find(self.dataset)
        self.assertNotEqual(records, [])
        Observation.delete_all(self.dataset)
        records = Observation.find(self.dataset)

        self.assertEqual(records, [])
    def append_observations(self, dframe):
        Observation.append(dframe, self)
        self.update({self.NUM_ROWS: self.num_rows + len(dframe)})

        # to update cardinalities here we need to refetch the full DataFrame.
        dframe = self.dframe(keep_parent_ids=True)
        self.build_schema(dframe)
        self.update_stats(dframe)
def delete_task(dataset, query=None):
    """Background task to delete dataset and its associated observations."""
    Observation.delete_all(dataset, query=query)

    if query is None:
        super(dataset.__class__,
              dataset).delete({DATASET_ID: dataset.dataset_id})
        Observation.delete_encoding(dataset)
Exemple #8
0
def delete_task(dataset, query=None):
    """Background task to delete dataset and its associated observations."""
    Observation.delete_all(dataset, query=query)

    if query is None:
        super(dataset.__class__, dataset).delete(
            {DATASET_ID: dataset.dataset_id})
        Observation.delete_encoding(dataset)
Exemple #9
0
    def append_observations(self, dframe):
        Observation.append(dframe, self)
        self.update({self.NUM_ROWS: self.num_rows + len(dframe)})

        # to update cardinalities here we need to refetch the full DataFrame.
        dframe = self.dframe(keep_parent_ids=True)
        self.build_schema(dframe)
        self.update_stats(dframe)
Exemple #10
0
    def __save_records(self):
        Observation.save(self.get_data('good_eats.csv'),
                         self.dataset)
        records = Observation.find(self.dataset)
        self.assertTrue(isinstance(records, list))
        self.assertTrue(isinstance(records[0], dict))
        self.assertTrue('_id' in records[0].keys())

        return records
    def remove_parent_observations(self, parent_id):
        """Remove obervations for this dataset with the passed `parent_id`.

        :param parent_id: Remove observations with this ID as their parent
            dataset ID.
        """
        Observation.delete_all(self, {PARENT_DATASET_ID: parent_id})
        # clear the cached dframe
        self.__dframe = None
Exemple #12
0
    def test_delete_encoding(self):
        self.__save_records()
        encoding = Observation.encoding(self.dataset)

        self.assertTrue(isinstance(encoding, dict))

        Observation.delete_encoding(self.dataset)
        encoding = Observation.encoding(self.dataset)

        self.assertEqual(encoding, None)
Exemple #13
0
    def delete_observation(self, index):
        """Delete observation at index.

        :params index: The index of an observation to delete.
        """
        Observation.delete(self, index)

        dframe = self.dframe()
        self.update({self.NUM_ROWS: len(dframe)})
        self.build_schema(dframe, overwrite=True)
    def delete_observation(self, index):
        """Delete observation at index.

        :params index: The index of an observation to delete.
        """
        Observation.delete(self, index)

        dframe = self.dframe()
        self.update({self.NUM_ROWS: len(dframe)})
        self.build_schema(dframe, overwrite=True)
        call_async(propagate, self, update={'delete': index})
Exemple #15
0
    def test_delete_one(self):
        self.__save_records()
        records = Observation.find(self.dataset)
        self.assertNotEqual(records, [])

        row = self.__decode(records[0])

        Observation.delete(self.dataset, row[INDEX])
        new_records = Observation.find(self.dataset)

        # Dump to avoid problems with nan != nan.
        self.assertEqual(dump_mongo_json(records[1:]),
                         dump_mongo_json(new_records))
Exemple #16
0
    def replace_observations(self, dframe, overwrite=False,
                             set_num_columns=True):
        """Remove all rows for this dataset and save the rows in `dframe`.

        :param dframe: Replace rows in this dataset with this DataFrame's rows.

        :returns: BambooFrame equivalent to the passed in `dframe`.
        """
        self.build_schema(dframe, overwrite=overwrite,
                          set_num_columns=set_num_columns)
        dframe = self.add_id_column_to_dframe(dframe)
        Observation.delete_all(self)

        return self.save_observations(dframe)
Exemple #17
0
    def observations(self, query_args=None, as_cursor=False):
        """Return observations for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param as_cursor: Return the observations as a cursor.
        """
        return Observation.find(self, query_args or QueryArgs(),
                                as_cursor=as_cursor)
Exemple #18
0
    def replace_observations(self, dframe, overwrite=False,
                             set_num_columns=True):
        """Remove all rows for this dataset and save the rows in `dframe`.

        :param dframe: Replace rows in this dataset with this DataFrame's rows.
        :param overwrite: If true replace the schema, otherwise update it.
            Default False.
        :param set_num_columns: If true update the dataset stored number of
            columns.  Default True.

        :returns: DataFrame equivalent to the passed in `dframe`.
        """
        self.build_schema(dframe, overwrite=overwrite,
                          set_num_columns=set_num_columns)
        Observation.delete_all(self)

        return self.save_observations(dframe)
Exemple #19
0
    def test_find_with_select_and_query(self):
        self.__save_records()
        self.query_args.select = {"rating": 1}
        rows = Observation.find(self.dataset, self.query_args)
        self.assertTrue(isinstance(rows, list))

        row = self.__decode(rows[0])

        self.assertEquals(sorted(row.keys()), ['_id', 'rating'])
    def observations(self, query_args=None, as_cursor=False):
        """Return observations for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param as_cursor: Return the observations as a cursor.
        """
        return Observation.find(self,
                                query_args or QueryArgs(),
                                as_cursor=as_cursor)
    def delete_columns(self, columns):
        """Delete column `column` from this dataset.

        :param column: The column to delete.
        """
        columns = set(self.schema.keys()).intersection(set(to_list(columns)))

        if not len(columns):
            raise ArgumentError("Columns: %s not in dataset.")

        Observation.delete_columns(self, columns)
        new_schema = self.schema

        [new_schema.pop(c) for c in columns]

        self.set_schema(new_schema, set_num_columns=True)

        return columns
Exemple #22
0
    def delete_columns(self, columns):
        """Delete column `column` from this dataset.

        :param column: The column to delete.
        """
        columns = set(self.schema.keys()).intersection(set(to_list(columns)))

        if not len(columns):
            raise ArgumentError("Columns: %s not in dataset.")

        Observation.delete_columns(self, columns)
        new_schema = self.schema

        [new_schema.pop(c) for c in columns]

        self.set_schema(new_schema, set_num_columns=True)

        return columns
    def test_delete(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            records = Dataset.find(self.test_dataset_ids[dataset_name])
            self.assertNotEqual(records, [])
            dataset.delete()
            records = Dataset.find(self.test_dataset_ids[dataset_name])

            self.assertEqual(records, [])
            self.assertEqual(Observation.encoding(dataset), None)
Exemple #24
0
    def test_delete(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            records = Dataset.find(self.test_dataset_ids[dataset_name])
            self.assertNotEqual(records, [])
            dataset.delete()
            records = Dataset.find(self.test_dataset_ids[dataset_name])

            self.assertEqual(records, [])
            self.assertEqual(Observation.encoding(dataset), None)
    def replace_observations(self,
                             dframe,
                             overwrite=False,
                             set_num_columns=True):
        """Remove all rows for this dataset and save the rows in `dframe`.

        :param dframe: Replace rows in this dataset with this DataFrame's rows.
        :param overwrite: If true replace the schema, otherwise update it.
            Default False.
        :param set_num_columns: If true update the dataset stored number of
            columns.  Default True.

        :returns: DataFrame equivalent to the passed in `dframe`.
        """
        self.build_schema(dframe,
                          overwrite=overwrite,
                          set_num_columns=set_num_columns)
        Observation.delete_all(self)

        return self.save_observations(dframe)
    def __create_or_update(self, url=None, csv_file=None, json_file=None,
                           schema=None, na_values=[], perish=0,
                           dataset_id=None):
        result = None
        error = 'url, csv_file or schema required'

        try:
            if schema or url or csv_file or json_file:
                if dataset_id is None:
                    dataset = Dataset()
                    dataset.save()
                else:
                    dataset = Dataset.find_one(dataset_id)
                    Observation.delete_all(dataset)

                if schema:
                    dataset.import_schema(schema)

                na_values = safe_json_loads(na_values)

                if url:
                    dataset.import_from_url(url, na_values=na_values)
                elif csv_file:
                    dataset.import_from_csv(csv_file, na_values=na_values)
                elif json_file:
                    dataset.import_from_json(json_file)

                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish)
            perish and dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file
        except JSONError as e:
            error = e.__str__()

        self.set_response_params(result, success_status_code=201)

        return self._dump_or_error(result, error)
Exemple #27
0
    def __create_or_update(self, url=None, csv_file=None, json_file=None,
                           schema=None, na_values=[], perish=0,
                           dataset_id=None):
        result = None
        error = 'url, csv_file or schema required'

        try:
            if schema or url or csv_file or json_file:
                if dataset_id is None:
                    dataset = Dataset()
                    dataset.save()
                else:
                    dataset = Dataset.find_one(dataset_id)
                    Observation.delete_all(dataset)

                if schema:
                    dataset.import_schema(schema)

                na_values = safe_json_loads(na_values)

                if url:
                    dataset.import_from_url(url, na_values=na_values)
                elif csv_file:
                    dataset.import_from_csv(csv_file, na_values=na_values)
                elif json_file:
                    dataset.import_from_json(json_file)

                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish)
            perish and dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file
        except JSONError as e:
            error = e.__str__()

        self.set_response_params(result, success_status_code=201)

        return self._dump_or_error(result, error)
Exemple #28
0
    def test_encoding(self):
        self.__save_records()
        encoding = Observation.encoding(self.dataset)

        for column in self.dataset.dframe().columns:
            if column == MONGO_ID:
                column = MONGO_ID_ENCODED

            self.assertTrue(column in encoding.keys())

        for v in encoding.values():
            self.assertTrue(isinstance(int(v), int))
Exemple #29
0
    def observations(self, query=None, select=None, limit=0, order_by=None,
                     as_cursor=False):
        """Return observations for this dataset.

        :param query: Optional query for MongoDB to limit rows returned.
        :param select: Optional select for MongoDB to limit columns.
        :param limit: If greater than 0, limit number of observations returned
            to this maximum.
        :param order_by: Order the returned observations.
        """
        return Observation.find(self, query, select, limit=limit,
                                order_by=order_by, as_cursor=as_cursor)
    def dframe(self,
               query_args=None,
               keep_parent_ids=False,
               padded=False,
               index=False,
               reload_=False,
               keep_mongo_keys=False):
        """Fetch the dframe for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param keep_parent_ids: Do not remove parent IDs from the dframe,
            default False.
        :param padded: Used for joining, default False.
        :param index: Return the index with dframe, default False.
        :param reload_: Force refresh of data, default False.
        :param keep_mongo_keys: Used for updating documents, default False.

        :returns: Return DataFrame with contents based on query parameters
            passed to MongoDB. DataFrame will not have parent ids if
            `keep_parent_ids` is False.
        """
        # bypass cache if we need specific version
        cacheable = not (query_args or keep_parent_ids or padded)

        # use cached copy if we have already fetched it
        if cacheable and not reload_ and self.__is_cached:
            return self.__dframe

        query_args = query_args or QueryArgs()
        observations = self.observations(query_args, as_cursor=True)

        if query_args.distinct:
            return DataFrame(observations)

        dframe = Observation.batch_read_dframe_from_cursor(
            self, observations, query_args.distinct, query_args.limit)

        dframe = df_mongo_decode(dframe, keep_mongo_keys=keep_mongo_keys)

        excluded = [keep_parent_ids and PARENT_DATASET_ID, index and INDEX]
        dframe = remove_reserved_keys(dframe, filter(bool, excluded))

        if index:
            dframe.rename(columns={INDEX: 'index'}, inplace=True)

        dframe = self.__maybe_pad(dframe, padded)

        if cacheable:
            self.__dframe = dframe

        return dframe
Exemple #31
0
    def observations(self, query_args=QueryArgs(), as_cursor=False):
        """Return observations for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param as_cursor: Return the observations as a cursor.
        """
        if query_args.distinct:
            as_cursor = True

        observations = Observation.find(self, query_args, as_cursor=as_cursor)

        if query_args.distinct:
            observations = observations.distinct(query_args.distinct)

        return observations
Exemple #32
0
 def get_value(self, period):
     value = None
     if self.dataset:
         fields = Observation.encoding(self.dataset)
         fields["dataset"] = self.dataset
         fields['dataset_id_field'] = fields[DATASET_ID]
         fields['period'] = period
         query = json.loads(Template(self.final_str).render(fields))
         form_meta_timeend = '%(form_meta_timeend)s' % fields
         query[0]['$match'][form_meta_timeend]['$gte'] = period.start
         query[0]['$match'][form_meta_timeend]['$lte'] = period.end
         aggregate_value = self._db.observations.aggregate(query)
         if not aggregate_value['result']:
             return 0
         value = aggregate_value['result'][0]['total_num']
     return value
 def get_value(self, period):
     value = None
     if self.dataset:
         fields = Observation.encoding(self.dataset)
         fields["dataset"] = self.dataset
         fields['dataset_id_field'] = fields[DATASET_ID]
         fields['period'] = period
         query = json.loads(Template(self.final_str).render(fields))
         form_meta_timeend = '%(form_meta_timeend)s' % fields
         query[0]['$match'][form_meta_timeend]['$gte'] = period.start
         query[0]['$match'][form_meta_timeend]['$lte'] = period.end
         aggregate_value = self._db.observations.aggregate(query)
         if not aggregate_value['result']:
             return 0
         value = aggregate_value['result'][0]['total_num']
     return value
Exemple #34
0
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        records = [x for x in Observation.find(dataset)]
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns
        # ensure no reserved keys
        for key in MONGO_RESERVED_KEY_STRS:
            self.assertFalse(key in columns)
        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Exemple #35
0
    def dframe(self, query_args=None, keep_parent_ids=False, padded=False,
               index=False, reload_=False, keep_mongo_keys=False):
        """Fetch the dframe for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param keep_parent_ids: Do not remove parent IDs from the dframe,
            default False.
        :param padded: Used for joining, default False.
        :param index: Return the index with dframe, default False.
        :param reload_: Force refresh of data, default False.
        :param keep_mongo_keys: Used for updating documents, default False.

        :returns: Return DataFrame with contents based on query parameters
            passed to MongoDB. DataFrame will not have parent ids if
            `keep_parent_ids` is False.
        """
        # bypass cache if we need specific version
        cacheable = not (query_args or keep_parent_ids or padded)

        # use cached copy if we have already fetched it
        if cacheable and not reload_ and self.__is_cached:
            return self.__dframe

        query_args = query_args or QueryArgs()
        observations = self.observations(query_args, as_cursor=True)

        if query_args.distinct:
            return DataFrame(observations)

        dframe = Observation.batch_read_dframe_from_cursor(
            self, observations, query_args.distinct, query_args.limit)

        dframe = df_mongo_decode(dframe, keep_mongo_keys=keep_mongo_keys)

        excluded = [keep_parent_ids and PARENT_DATASET_ID, index and INDEX]
        dframe = remove_reserved_keys(dframe, filter(bool, excluded))

        if index:
            dframe.rename(columns={INDEX: 'index'}, inplace=True)

        dframe = self.__maybe_pad(dframe, padded)

        if cacheable:
            self.__dframe = dframe

        return dframe
Exemple #36
0
    def test_edit_row(self):
        dataset_id = self._post_file()
        index = 0
        update = {'amount': 10, 'food_type': 'breakfast'}
        expected_dframe = Dataset.find_one(dataset_id).dframe()
        expected_row = expected_dframe.ix[0].to_dict()
        expected_row.update(update)
        expected_dframe.ix[0] = Series(expected_row)

        results = json.loads(self.controller.row_update(dataset_id, index,
                                                        json.dumps(update)))
        self.assertTrue(Datasets.SUCCESS in results.keys())

        dataset = Dataset.find_one(dataset_id)
        dframe = dataset.dframe()
        self.assertEqual(self.NUM_ROWS, len(dframe))
        self._check_dframes_are_equal(expected_dframe, dframe)

        # check that previous row exists
        all_observations = Observation.find(dataset, include_deleted=True)
        self.assertEqual(self.NUM_ROWS + 1, len(all_observations))
 def get_value(self, period):
     value = None
     if self.dataset:
         fields = Observation.encoding(self.dataset)
         fields["dataset"] = self.dataset
         fields['dataset_id_field'] = fields[DATASET_ID]
         fields['period'] = period
         mapper = Code(Template(self.mapper_str).render(fields))
         reducer = Code(Template(self.reducer_str).render(fields))
         query = json.loads(Template(self.query_str).render(fields))
         query['%(form_meta_timeend)s' % fields]['$gte'] = period.start
         query['%(form_meta_timeend)s' % fields]['$lte'] = period.end
         aggregate = json.loads(Template(self.aggregate_str).render(fields))
         results = self._db.observations.map_reduce(
             mapper, reducer, 'myresults_malaria', query=query)
         value = None
         if results.count():
             aggregate_value = results.aggregate(aggregate)
             value = aggregate_value['result'][0]['total']
         self._db.myresults_malaria.drop()
     return value
    def test_edit_row(self):
        dataset_id = self._post_file()
        index = 0
        update = {'amount': 10, 'food_type': 'breakfast'}
        expected_dframe = Dataset.find_one(dataset_id).dframe()
        expected_row = expected_dframe.ix[0].to_dict()
        expected_row.update(update)
        expected_dframe.ix[0] = Series(expected_row)

        results = json.loads(
            self.controller.row_update(dataset_id, index, json.dumps(update)))
        self.assertTrue(Datasets.SUCCESS in results.keys())

        dataset = Dataset.find_one(dataset_id)
        dframe = dataset.dframe()
        self.assertEqual(self.NUM_ROWS, len(dframe))
        self._check_dframes_are_equal(expected_dframe, dframe)

        # check that previous row exists
        all_observations = Observation.find(dataset, include_deleted=True)
        self.assertEqual(self.NUM_ROWS + 1, len(all_observations))
 def get_value(self, period):
     value = None
     if self.dataset:
         fields = Observation.encoding(self.dataset)
         fields["dataset"] = self.dataset
         fields['dataset_id_field'] = fields[DATASET_ID]
         fields['period'] = period
         mapper = Code(Template(self.mapper_str).render(fields))
         reducer = Code(Template(self.reducer_str).render(fields))
         query = json.loads(Template(self.query_str).render(fields))
         query['%(form_meta_timeend)s' % fields]['$gte'] = period.start
         query['%(form_meta_timeend)s' % fields]['$lte'] = period.end
         aggregate = json.loads(Template(self.aggregate_str).render(fields))
         results = self._db.observations.map_reduce(mapper,
                                                    reducer,
                                                    'myresults_malaria',
                                                    query=query)
         value = None
         if results.count():
             aggregate_value = results.aggregate(aggregate)
             value = aggregate_value['result'][0]['total']
         self._db.myresults_malaria.drop()
     return value
Exemple #40
0
    def test_delete_row(self):
        dataset_id = self._post_file()
        dataset = Dataset.find_one(dataset_id)
        index = 0
        expected_dframe = Dataset.find_one(
            dataset_id).dframe()[index + 1:].reset_index()
        del expected_dframe['index']

        results = json.loads(self.controller.row_delete(dataset_id, index))
        self.assertTrue(Datasets.SUCCESS in results.keys())

        dataset = Dataset.find_one(dataset_id)
        dframe = dataset.dframe()
        self.assertEqual(self.NUM_ROWS - 1, len(dframe))
        self._check_dframes_are_equal(expected_dframe, dframe)

        # check info updated
        info = dataset.info()
        self.assertEqual(self.NUM_ROWS - 1, info[Dataset.NUM_ROWS])

        # check that row is softly deleted
        all_observations = Observation.find(dataset, include_deleted=True)
        self.assertEqual(self.NUM_ROWS, len(all_observations))
    def test_delete_row(self):
        dataset_id = self._post_file()
        dataset = Dataset.find_one(dataset_id)
        index = 0
        expected_dframe = Dataset.find_one(
            dataset_id).dframe()[index + 1:].reset_index()
        del expected_dframe['index']

        results = json.loads(self.controller.row_delete(dataset_id, index))
        self.assertTrue(Datasets.SUCCESS in results.keys())

        dataset = Dataset.find_one(dataset_id)
        dframe = dataset.dframe()
        self.assertEqual(self.NUM_ROWS - 1, len(dframe))
        self._check_dframes_are_equal(expected_dframe, dframe)

        # check info updated
        info = dataset.info()
        self.assertEqual(self.NUM_ROWS - 1, info[Dataset.NUM_ROWS])

        # check that row is softly deleted
        all_observations = Observation.find(dataset, include_deleted=True)
        self.assertEqual(self.NUM_ROWS, len(all_observations))
 def update_observations(self, dframe):
     return Observation.update_from_dframe(dframe, self)
 def update_observation(self, index, data):
     # check that update is valid
     dframe_from_update(self, [data])
     Observation.update(self, index, data)
     call_async(propagate, self, update={'edit': [index, data]})
        def action(dataset):
            row = Observation.find_one(dataset, parse_int(index))

            if row:
                return row.clean_record
Exemple #45
0
 def test_find_with_select(self):
     self._save_observations()
     query_args = QueryArgs(select={"rating": 1})
     rows = Observation.find(self.dataset, query_args)
     self.assertTrue(isinstance(rows, list))
     self.assertEquals(sorted(rows[0].keys()), ['_id', 'rating'])
Exemple #46
0
 def test_find_with_query(self):
     self._save_observations()
     rows = Observation.find(self.dataset, self.query_args)
     self.assertTrue(isinstance(rows, list))
Exemple #47
0
 def test_find(self):
     self._save_observations()
     rows = Observation.find(self.dataset)
     self.assertTrue(isinstance(rows, list))
Exemple #48
0
 def test_save_over_bulk(self):
     Observation.save(self.get_data('good_eats_large.csv'),
                      self.dataset)
     records = Observation.find(self.dataset)
     self.assertEqual(len(records), 1001)
Exemple #49
0
 def __decode(self, row):
     return Observation.encode(row,
                               encoding=Observation.decoding(self.dataset))
Exemple #50
0
    def test_encode_no_dataset(self):
        records = self.__save_records()

        for record in records:
            encoded = Observation.encode(record)
            self.assertEqual(dump_mongo_json(encoded), dump_mongo_json(record))
Exemple #51
0
    def test_save_over_bulk(self):
        Observation.save(self.get_data('good_eats_large.csv'),
                         self.dataset)
        records = Observation.find(self.dataset)

        self.assertEqual(len(records), 1001)
    def save_observations(self, dframe):
        """Save rows in `dframe` for this dataset.

        :param dframe: DataFrame to save rows from.
        """
        return Observation.save(dframe, self)
Exemple #53
0
    def test_find(self):
        self.__save_records()
        rows = Observation.find(self.dataset)

        self.assertTrue(isinstance(rows, list))
Exemple #54
0
    def test_find_with_query(self):
        self.__save_records()
        rows = Observation.find(self.dataset, self.query_args)

        self.assertTrue(isinstance(rows, list))
Exemple #55
0
{"{{dataset_id_field}}": "{{dataset.dataset_id}}",
"{{form_meta_timeend}}": {
              "$gte": "{{period.start}}", 
              "$lte": "{{period.end}}"
            }
}
"""
aggregate_str = """
{"$group": {"_id": 0, "total": {"$sum": "$value.{{num_using_fp}}"}}}
"""
dataset_id = "5791793ac29b4d77b20cf1a04d8e7161"
dataset = Dataset.find_one(dataset_id)
period = Period.month_period(2013, 3)

if dataset:
    fields = Observation.encoding(dataset)
    fields["dataset"] = dataset
    fields['dataset_id_field'] = fields[DATASET_ID]
    fields['period'] = Period.month_period(2013, 3)
    mapper = Code(Template(mapper_str).render(fields))
    reducer = Code(Template(reducer_str).render(fields))
    query = json.loads(Template(query_str).render(fields))
    query['%(form_meta_timeend)s' % fields]['$gte'] = period.start
    query['%(form_meta_timeend)s' % fields]['$lte'] = period.end
    aggregate = json.loads(Template(aggregate_str).render(fields))
    results = db.observations.map_reduce(mapper,
                                         reducer,
                                         'myresults_fp',
                                         query=query)
    if results.count():
        value = results.aggregate(aggregate)