def test_dyad_aggression_by_year(self): rv = PandasGdeltHelper.dyad_aggression_by_year('series') assert rv is not None logging.warning("These tests will all be dependent on using the sample data.") assert rv[18] == 1.0 assert rv.index[18] == ('ALBGOV', 'CUB', 1982, False) # From here... sanity check but not quite the same as accessing one row by MultiIndex assert 1982 == rv.index.levels[2][rv.index.labels[2][27]] df = PandasGdeltHelper.dyad_aggression_by_year() assert df is not None assert 'AUS' == df.loc(0)[25]['actor1code']
def test_events_from_local_files(self): if LONG_TEST_TOLERANCE < 120: self.skipTest('Test takes 2 minutes or more. Would be better to work on some smaller ' 'stubbed files.') helper = PandasGdeltHelper(table_name='events', data_source='local') rv = helper.events() assert rv is not None logging.warning("This isn't testing that the data being used is local data.")
def test_events_default(self): if LONG_TEST_TOLERANCE < 120: self.skipTest('Test takes 2 minutes or more. Would be better to work on some smaller ' 'stubbed files.') #... and that would necessitate changing the interface to allow passed-in settings, # which is a very good thing. helper = PandasGdeltHelper(table_name='events') rv = helper.events() assert rv is not None
def prepare_data(self): self._events_data = PandasGdeltHelper.events() columns_for_X = self._events_data[['fractiondate', 'goldsteinscale']] X = np.reshape(np.array(columns_for_X), (self._events_data.shape[0], -1)) y = np.reshape(np.array(self._events_data.avgtone), (self._events_data.shape[0], -1)) ( self._X_train, self._X_test, self._y_train, self._y_test, ) = train_test_split(X, y, test_size=0.33) assert (self._events_data is not None)
def test_dyad_events_by_year(self): """This is my present thinking about what the 'conventional' path going forward should be: Instantiate the helper class with a table name and load the table. However I don't yet know how to deal with other functionality that doesn't pertain to a table in the datafiles. Thus, for now, something like test_dyad_aggression_by_year will still use the class methods rather than instance methods. The obvious solution is to somehow make the PandasGdeltHelper class smart enough to handle 'virtual tables' like dyad_aggression_by_year that requires some sort of data munging. That said I need to think whether the logic (what are aggressive codes?) needs to live in the data helper or in the classification file.""" helper = PandasGdeltHelper('dyad_events_by_year') assert helper is not None self.skipTest('fetch() method is not yet fully implemented.') rv = helper.fetch() assert rv.shape[0] > 0 assert rv.shape[1] > 0 assert rv is not None
def test_country_features(self): df = PandasGdeltHelper.country_features() new_columns = ['proportion_actor1', 'aggregate_relationships',] assert True == all([df[c] is not None for c in new_columns])
def test_events_from_sample_data(self): helper = PandasGdeltHelper(table_name='events', data_source='sample') rv = helper.events() assert rv is not None logging.warning("This isn't testing that the data being used is sample data.")