def get_catch22_features(series):
    import catch22

    # dict of {'names': [list of 22 feature names], 'values': [list of 22 feature values]}
    features = catch22.catch22_all(series)

    return features
Exemple #2
0
def featurize(data, sample_rate):
    """
	Given a list of numpy arrays containing multivariate time series and a sample rate, computes a number of "features"
	from each univariate time series (row) in the matrix and replaces the row with this list of features (in place). The 
	end result is that the list of multivariate time series matrices is replaced with a list of "feature matrices", where 
	position (i,j) contains the j-th scalar valued feature computed on the i-th channel in the EEG signal.

    Parameters
    ----------
    data : list(numpy.ndarray(float))
    	List of matrices of data for individual subjects, size channels by time series length
    sample_rate : int
    	Sample rate in Hz
    """

    # for each matrix in the list...
    tau = 5
    num_channels = data[0].shape[0]

    for i in range(len(data)):
        # create a list to temporarily hold features
        features = []
        j = 0

        # for each row in the matrix...
        for channel in data[i]:
            # this print statement can be removed, but lets you know that things are moving
            print("working on: " + str((i, j)))
            # compute a list of features using catch22 and append to the features variable
            features += catch22.catch22_all(channel.tolist())['values']
            features += mean_var_features(channel, tau, sample_rate)
            j += 1

        # convert the 2d list to a numpy array and assign to data[i]
        data[i] = np.array(features).reshape(num_channels, -1)
Exemple #3
0
    def predict_proba(self, X, check_input=True):
        # Correct formating of x
        if len(X.iloc[0]) == 1:  # UNI
            X = [
                np.array(X.iloc[i].iloc[0]).tolist() for i in range(0, len(X))
            ]
        else:  # MULTI
            X = [[
                np.array(X.iloc[i].iloc[j]).tolist()
                for j in range(0, len(X.iloc[i]))
            ] for i in range(0, len(X))]

        if check_input:
            X = check_array(X, dtype=np.float64, allow_nd=True, order="C")

        if X.ndim < 2 or X.ndim > 3:
            raise ValueError("illegal input dimensions X.ndim ({})".format(
                X.ndim))

        if self.n_dims_ > 1 and X.ndim != 3:
            raise ValueError("illegal input dimensions X.ndim != 3")

        if X.shape[-1] != self.n_timestep_:
            raise ValueError("illegal input shape ({} != {})".format(
                X.shape[-1], self.n_timestep_))

        if X.ndim > 2 and X.shape[1] != self.n_dims_:
            raise ValueError("illegal input shape ({} != {}".format(
                X.shape[1], self.n_dims))

        if X.dtype != np.float64 or not X.flags.contiguous:
            X = np.ascontiguousarray(X, dtype=np.float64)

        X = X.reshape(X.shape[0], self.n_dims_ * self.n_timestep_)

        # compute catch22 features
        num_insts = X.shape[0]
        X_catch22 = []
        for i in range(num_insts):
            series = X[i, :]
            c22_dict = catch22_all(series)
            X_catch22.append(c22_dict['values'])

        # replace the rare nans
        X_catch22 = np.array(X_catch22)
        X_catch22[np.logical_or(np.isnan(X_catch22), np.isinf(X_catch22))] = 0

        return self.bagging_classifier_.predict_proba(X_catch22)
Exemple #4
0
    def predict_proba(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        n_instances = X.shape[0]
        X = np.reshape(X, (n_instances, -1))

        c22_list = []
        for i in range(n_instances):
            series = X[i, :]
            c22_dict = catch22_all(series)
            c22_list.append(c22_dict["values"])

        X_c22 = np.array(c22_list)
        np.nan_to_num(X_c22, False, 0, 0, 0)

        return self.classifier.predict_proba(X_c22)
Exemple #5
0
def feature_channel(channel, fsamp=256, band=range(0, 45)):
    """Convert time series of a given channel to list of features.
    Args:
        channel: 1-D array-like
        fsamp: sampling rate in Hz.
        band: band-pass in Hz.
    Returns:
        list: 1-D array-like
    """

    # catch 22
    res = catch22.catch22_all(channel)['values']

    # power and freq
    power = pyeeg.bin_power(channel, Band=band, Fs=fsamp)[0]
    pwd = np.mean(power)
    freqs = np.arange(0, len(power))
    pdf = np.array(power) / np.sum(power)
    mu = np.sum(freqs * pdf)
    # m2 = np.sum((freqs - mu)**2 * pdf)
    res.extend([pwd, mu])

    return res
Exemple #6
0
    def fit(self, X, y):
        """Fit a random catch22 feature forest classifier

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        n_instances = X.shape[0]
        X = np.reshape(X, (n_instances, -1))
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        c22_list = []
        for i in range(n_instances):
            series = X[i, :]
            c22_dict = catch22_all(series)
            c22_list.append(c22_dict["values"])

        self.classifier = RandomForestClassifier(
            n_jobs=self.n_jobs,
            n_estimators=self.n_estimators,
            random_state=self.random_state,
        )

        X_c22 = np.array(c22_list)
        np.nan_to_num(X_c22, False, 0, 0, 0)

        self.classifier.fit(X_c22, y)

        self._is_fitted = True
        return self
Exemple #7
0
    def transform(self, X, y=None):
        """transforms data into the catch22 features

        Parameters
        ----------
        X : pandas DataFrame, input time series
        y : array_like, target values (optional, ignored)

        Returns
        -------
        Pandas dataframe containing 22 features for each input series
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        n_instances = X.shape[0]
        X = np.reshape(X, (n_instances, -1))

        c22_list = []
        for i in range(n_instances):
            series = X[i, :]
            c22_dict = catch22.catch22_all(series)
            c22_list.append(c22_dict["values"])

        return pd.DataFrame(c22_list)
Exemple #8
0
    def fit(self, X, y, sample_weight=None, check_input=True):
        """Fit a random catch22 feature forest classifier
        """
        # Correct formating of x
        if len(X.iloc[0]) == 1:  # UNI
            X2 = [
                np.array(X.iloc[i].iloc[0]).tolist() for i in range(0, len(X))
            ]
        else:  # MULTI
            X2 = [[
                np.array(X.iloc[i].iloc[j]).tolist()
                for j in range(0, len(X.iloc[i]))
            ] for i in range(0, len(X))]

        random_state = check_random_state(self.random_state)
        if check_input:
            X = check_array(X2, dtype=np.float64, allow_nd=True, order="C")
            y = check_array(y, ensure_2d=False)

        if X.ndim < 2 or X.ndim > 3:
            raise ValueError("illegal input dimension")

        n_samples = X.shape[0]
        self.n_timestep_ = X.shape[-1]
        if X.ndim > 2:
            n_dims = X.shape[1]
        else:
            n_dims = 1

        self.n_dims_ = n_dims

        if y.ndim == 1:
            self.classes_, y = np.unique(y, return_inverse=True)
        else:
            _, y = np.nonzero(y)
            if len(y) != n_samples:
                raise ValueError("Single label per sample expected.")
            self.classes_ = np.unique(y)

        if len(y) != n_samples:
            raise ValueError("Number of labels={} does not match "
                             "number of samples={}".format(len(y), n_samples))

        if X.dtype != np.float64 or not X.flags.contiguous:
            X = np.ascontiguousarray(X, dtype=np.float64)

        if not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=np.intp)

        tree_classifier = treeClassifier(
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            metric=self.metric,
            metric_params=self.metric_params,
            random_state=random_state,
        )

        if n_dims > 1:
            tree_classifier.force_dim = n_dims

        self.bagging_classifier_ = BaggingClassifier(
            base_estimator=tree_classifier,
            bootstrap=self.bootstrap,
            n_jobs=self.n_jobs,
            n_estimators=self.n_estimators,
            random_state=self.random_state,
        )
        X = X.reshape(n_samples, n_dims * self.n_timestep_)

        # compute catch22 features
        num_insts = X.shape[0]
        X_catch22 = []
        for i in range(num_insts):
            series = X[i, :]
            c22_dict = catch22_all(series)
            X_catch22.append(c22_dict['values'])

        self.bagging_classifier_.fit(X_catch22, y, sample_weight=sample_weight)
        return self
Exemple #9
0
def _get_features(ts: Dict[str, Any]) -> Dict[str, Any]:
    features = catch22.catch22_all(ts["target"])
    return dict(zip(features["names"], features["values"]))
d = pd.read_csv(
    "/Users/trenthenderson/Documents/R/google-trends/data/31_Aug_data.csv")

#%%
#---------------- HCTSA -------------------------------

searches = d.interest_over_time_keyword.unique()
search_data = []

for s in searches:
    tmp1 = d[d['interest_over_time_keyword'] == s]
    tmp1 = tmp1.dropna()
    tmp2 = tmp1[['interest_over_time_hits']]
    tmp2 = tmp2.to_numpy()

    results = pd.DataFrame.from_dict(catch22_all(tmp2))
    results['keyword'] = s

    search_data.append(results)

search_data = pd.concat(search_data)

#%%
#---------------- VISUALISATION -----------------------

# Standardise values

heat_data = search_data.assign(values=search_data.groupby('names').transform(
    lambda x: (x - x.mean()) / x.std()))

heat_data = pd.pivot_table(heat_data,
Exemple #11
0
import catch22

for dataFile in ['../testData/test.txt', '../testData/test2.txt']:

    print('\n'), dataFile

    data = [line.rstrip().split(' ') for line in open(dataFile)]
    flat_data = [float(item) for sublist in data for item in sublist]

    catchOut = catch22.catch22_all(flat_data)

    featureNames = catchOut['names']
    featureValues = catchOut['values']

    for featureName, featureValue in zip(featureNames, featureValues):
        print('%s : %1.6f' % (featureName, featureValue))
 def series_features(series):
     feat = catch22_all(series)
     return dict(zip(feat['names'], feat['values']))
Exemple #13
0
def extract_stats(temp_list):
    return list(catch22_all(temp_list).values())[1]
Exemple #14
0
    return d6


#%%

# Make dataframes for each age group

did_recover = the_extractor(d4, 1)
did_not_recover = the_extractor(d4, 0)

#%%

# Apply catch22

did_recover_output = pd.DataFrame.from_dict(catch22_all(did_recover))
did_recover_output['category'] = 1

did_not_recover_output = pd.DataFrame.from_dict(catch22_all(did_not_recover))
did_not_recover_output['category'] = 0

#%%

# Bind all together

final = did_recover_output
final = final.append(did_not_recover_output)

# Recode binary to words

final['category_word'] = [
# Make dataframes for each age group

age_15 = the_extractor(d1, "15-19")
age_20 = the_extractor(d1, "20-24")
age_25 = the_extractor(d1, "25-29")
age_30 = the_extractor(d1, "30-34")
age_35 = the_extractor(d1, "35-39")
age_40 = the_extractor(d1, "40-44")
age_45 = the_extractor(d1, "45-49")

#%%

# Apply catch22

age_15_output = pd.DataFrame.from_dict(catch22_all(age_15))
age_15_output['age_group'] = '15-19'

age_20_output = pd.DataFrame.from_dict(catch22_all(age_20))
age_20_output['age_group'] = '20-24'

age_25_output = pd.DataFrame.from_dict(catch22_all(age_25))
age_25_output['age_group'] = '25-29'

age_30_output = pd.DataFrame.from_dict(catch22_all(age_30))
age_30_output['age_group'] = '30-34'

age_35_output = pd.DataFrame.from_dict(catch22_all(age_35))
age_35_output['age_group'] = '35-39'

age_40_output = pd.DataFrame.from_dict(catch22_all(age_40))