def setUp(self): # classification problem. iris = pd.read_csv('iris.csv') self.iris = Dataset(iris) # Regression problem tips = pd.read_csv('tips.csv') self.tips = Dataset(tips, dependent_col='tip')
def generate_metafeatures_from_server(file_id, target_field, **kwargs): # Read the data set into memory raw_data = get_file_from_server(file_id) df = pd.read_csv(StringIO(raw_data), sep=None, engine='python', **kwargs) dataset = Dataset(df, dependent_col=target_field, prediction_type='classification') return generate_metafeatures(dataset, target_field)
def get_metafeatures(df): dataset = Dataset(df, dependent_col = 'class', prediction_type='classification') meta_features = OrderedDict() for i in dir(dataset): result = getattr(dataset, i) if not i.startswith('__') and not i.startswith('_') and hasattr(result, '__call__'): meta_features[i] = result() return meta_features
def generate_metafeatures_from_filepath(input_file, target_field, **kwargs): """Calls metafeature generating methods from dataset_describe""" # Read the data set into memory df = pd.read_csv(input_file, sep=None, engine='python', **kwargs) dataset = Dataset(df, dependent_col=target_field, prediction_type='classification') return generate_metafeatures(dataset, target_field)