Esempio n. 1
0
def calculate_updates(dataset, new_data, new_dframe_raw=None,
                      parent_dataset_id=None, update_id=None):
    """Update dataset with `new_data`.

    This can result in race-conditions when:

    - deleting ``controllers.Datasets.DELETE``
    - updating ``controllers.Datasets.POST([dataset_id])``

    Therefore, perform these actions asychronously.

    :param new_data: Data to update this dataset with.
    :param new_dframe_raw: DataFrame to update this dataset with.
    :param parent_dataset_id: If passed add ID as parent ID to column,
        default is None.
    """
    __ensure_ready(dataset, update_id)

    if new_dframe_raw is None:
        new_dframe_raw = dframe_from_update(dataset, new_data)

    new_dframe = recognize_dates(new_dframe_raw, dataset.schema)

    new_dframe = __add_calculations(dataset, new_dframe)

    # set parent id if provided
    if parent_dataset_id:
        new_dframe = add_parent_column(new_dframe, parent_dataset_id)

    dataset.append_observations(new_dframe)
    dataset.clear_summary_stats()

    propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw})

    dataset.update_complete(update_id)
Esempio n. 2
0
def csv_file_reader(name, na_values=[], delete=False):
    try:
        return recognize_dates(
            pd.read_csv(name, encoding='utf-8', na_values=na_values))
    finally:
        if delete:
            os.unlink(name)
Esempio n. 3
0
def csv_file_reader(name, na_values=[], delete=False):
    try:
        return recognize_dates(
            pd.read_csv(name, encoding='utf-8', na_values=na_values))
    finally:
        if delete:
            os.unlink(name)
Esempio n. 4
0
 def setUp(self):
     TestBase.setUp(self)
     self.dataset = Dataset()
     self.dataset.save(
         self.test_dataset_ids['good_eats_with_calculations.csv'])
     dframe = recognize_dates(
         self.get_data('good_eats_with_calculations.csv'))
     self.dataset.save_observations(dframe)
     self.group = None
     self.places = 5
    def test_recognize_dates_from_schema(self):
        schema = Schema({
            'submit_date': {
                SIMPLETYPE: DATETIME
            }
        })
        df_with_dates = recognize_dates(self.dframe, schema)

        for field in df_with_dates['submit_date']:
            self.assertTrue(isinstance(field, datetime))
 def setUp(self):
     TestBase.setUp(self)
     self.dataset = Dataset()
     self.dataset.save(
         self.test_dataset_ids['good_eats_with_calculations.csv'])
     dframe = recognize_dates(
         self.get_data('good_eats_with_calculations.csv'))
     self.dataset.save_observations(dframe)
     self.group = None
     self.places = 5
Esempio n. 7
0
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"])
        dataset.save_observations(recognize_dates(self.get_data("good_eats.csv")))
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data("good_eats.csv").reindex(columns=dframe.columns).eq(dframe)))
        columns = dframe.columns

        # ensure no reserved keys
        self.assertFalse(MONGO_ID_ENCODED in columns)

        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Esempio n. 8
0
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        records = [x for x in Observation.find(dataset)]
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns
        # ensure no reserved keys
        for key in MONGO_RESERVED_KEY_STRS:
            self.assertFalse(key in columns)
        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns

        # ensure no reserved keys
        self.assertFalse(MONGO_ID_ENCODED in columns)

        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Esempio n. 10
0
def calculate_updates(dataset, new_data, new_dframe_raw=None,
                      parent_dataset_id=None, update_id=None):
    """Update dataset with `new_data`.

    This can result in race-conditions when:

    - deleting ``controllers.Datasets.DELETE``
    - updating ``controllers.Datasets.POST([dataset_id])``

    Therefore, perform these actions asychronously.

    :param new_data: Data to update this dataset with.
    :param new_dframe_raw: DataFrame to update this dataset with.
    :param parent_dataset_id: If passed add ID as parent ID to column,
        default is None.
    """
    if not __update_is_valid(dataset, new_dframe_raw):
        dataset.remove_pending_update(update_id)
        return

    __ensure_ready(dataset, update_id)

    if new_dframe_raw is None:
        new_dframe_raw = dframe_from_update(dataset, new_data)

    new_dframe = recognize_dates(new_dframe_raw, dataset.schema)

    new_dframe = __add_calculations(dataset, new_dframe)

    # set parent id if provided
    if parent_dataset_id:
        new_dframe = add_parent_column(new_dframe, parent_dataset_id)

    dataset.append_observations(new_dframe)
    dataset.clear_summary_stats()

    propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw})

    dataset.update_complete(update_id)
Esempio n. 11
0
def json_file_reader(content):
    return recognize_dates(pd.DataFrame(json.loads(content)))
Esempio n. 12
0
 def recognize_dates(self):
     return recognize_dates(self)
Esempio n. 13
0
 def test_recognize_dates(self):
     with_dates = recognize_dates(self.dframe)
     for field in with_dates['single_letter']:
         self.assertTrue(isinstance(field, basestring))
 def _verify_dataset(self, dataset_id, fixture_path):
     dframe = Dataset.find_one(dataset_id).dframe()
     expected_dframe = recognize_dates(
         pickle.load(open('%s%s' % (
             self.FIXTURE_PATH, fixture_path), 'rb')))
     self._check_dframes_are_equal(dframe, expected_dframe)
    def test_recognize_dates_as_dates(self):
        df_with_dates = recognize_dates(self.dframe)

        for field in df_with_dates['submit_date']:
            self.assertTrue(isinstance(field, datetime))
Esempio n. 16
0
    def test_count(self):
        dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"])
        dataset.save_observations(recognize_dates(self.get_data("good_eats.csv")))

        self.assertEqual(len(dataset.dframe()), dataset.count())
 def _verify_dataset(self, dataset_id, fixture_path):
     dframe = Dataset.find_one(dataset_id).dframe()
     expected_dframe = recognize_dates(
         pickle.load(open('%s%s' % (self.FIXTURE_PATH, fixture_path),
                          'rb')))
     self._check_dframes_are_equal(dframe, expected_dframe)
Esempio n. 18
0
def json_file_reader(content):
    return recognize_dates(pd.DataFrame(json.loads(content)))
Esempio n. 19
0
    def test_count(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))

        self.assertEqual(len(dataset.dframe()), dataset.count())
Esempio n. 20
0
 def _save_observations(self):
     return Observation.save(
         recognize_dates(self.get_data('good_eats.csv')), self.dataset)
    def test_recognize_dates(self):
        dframe = self.get_data('soil_samples.csv')
        with_dates = recognize_dates(dframe)

        for field in with_dates['single_letter']:
            self.assertTrue(isinstance(field, basestring))