def calculate_cooccurrence_matrix(z, n_groups=None): if n_groups is None: n_groups = np.unqiue(z).shape[0] # dummy encode group membership indicator = np.eye(n_groups)[z] return np.dot(indicator, indicator.T)
def get_obsfeatures(features): u_features = np.unique(features) features = [] for ufeature in u_features: features += read_2bit(ufeature) return np.unqiue(features)
def get_locs(docents): res=[] people=[] for entity in docents: if entity.label_ in loc_labels: res.append(re.sub("[^a-z, ]","",entity.text.lower())) elif entity.label_=='PERSON': people.append(entity.text) if not res: return [''], '', np.unqiue(people) if people else [''] else: res, counts=np.unique(res, return_counts=True) return res.tolist(), res[np.argmax(counts)], np.unique(people)
def get_number_of_frames_per_particle(dpeaks,select = None): """ return number of frames per each particle. ToDO: add selector and allow extraction only a part of the data """ import numpy as np particle_max = dpeaks['Mp'].max() length_frames = [] particle_ids = [] for particle in range(particle_max): particle_ids.append(particle) length_frames.append(len(np.unqiue(dpeaks['frame'][dpeaks['Mp']==particle]))) return np.array(particle_ids),np.array(length_frames)
def _equi_depth_conversion(self, data, min_zero=True, col_name='KPI', B=4): """auto binning, default is four bin, if min max not not much of difference,""" # trick "i is np.nan" doesn't work, has to use np.isnan() function num = data[~np.isnan(data)] # remove nan for split value calculation # num = [i for i in data if ~np.isnan(i)] # alternative way is slower if np.abs(np.std(num) / np.mean(num)) < 0.0: # small variation don't bother return ['Stable'] * len(data) # p1 could either be 0 or the first 20 percentile min_value = max(min(num), -1e10) v = np.zeros(B + 1) for it in range(B): v[it] = np.percentile(num, 100.0 / B * it) v_uni, idx_uni = np.unqiue(v, return_index=True) q = np.linspace(0.0, 1.0, B + 1) q_uni = q[idx_uni] B_uni = len(q_uni) - 1 ret = pd.qcut(data, q=q_uni, labels=range(B_uni).astype(np.float)) return ret
def _leave1_range_out(x_set, y_set, ranges, shuffle): """Generator that returns the range of the run left out for test and the rest for fitting Parameters: x_set: np.ndarray X set of the dataset to be splitted y_set: np.ndarray Y set of the dataset to be splitted ranges: 2D array-like Array representing the ranges for each class where the ranges array from ranges[i] is the ranges from class i shuffle: bool If True, shuffles the data Yields: class_out: Class that was taken out train_ranges: Training ranges test_range: Test range """ classes = np.unqiue(y_set) for class_out, class_ranges in zip(classes, ranges): for range_out_index in range(len(class_ranges)): train_ranges = list(class_ranges) test_range = train_ranges.pop(range_out_index) train_ranges = np.hstack(np.hstack(train_ranges)) test_range = np.array(test_range) if shuffle: np.random.shuffle(train_ranges) np.random.shuffle(test_range) yield class_out, train_ranges, test_range
def t_unique(data1, pkey): x, ind = np.unqiue(data1['pkey'], return_index=True) if ind.size < data1[pkey].size: t_filter(data1, ind)