Beispiel #1
0
def calculate_cooccurrence_matrix(z, n_groups=None):
    if n_groups is None:
        n_groups = np.unqiue(z).shape[0]

    # dummy encode group membership
    indicator = np.eye(n_groups)[z]

    return np.dot(indicator, indicator.T)
Beispiel #2
0
def get_obsfeatures(features):

    u_features = np.unique(features)

    features = []
    for ufeature in u_features:
        features += read_2bit(ufeature)

    return np.unqiue(features)
def get_locs(docents):
    res=[]
    people=[]
    for entity in docents:
        if entity.label_ in loc_labels:
            res.append(re.sub("[^a-z, ]","",entity.text.lower()))
        elif entity.label_=='PERSON':
            people.append(entity.text)
    if not res:
        return [''], '', np.unqiue(people) if people else ['']
    else:
        res, counts=np.unique(res, return_counts=True)
        return res.tolist(), res[np.argmax(counts)], np.unique(people)
Beispiel #4
0
def get_number_of_frames_per_particle(dpeaks,select = None):
    """
    return number of frames per each particle.

    ToDO: add selector and allow extraction only a part of the data
    """
    import numpy as np
    particle_max = dpeaks['Mp'].max()
    length_frames = []
    particle_ids = []
    for particle in range(particle_max):
        particle_ids.append(particle)
        length_frames.append(len(np.unqiue(dpeaks['frame'][dpeaks['Mp']==particle])))
    return np.array(particle_ids),np.array(length_frames)
Beispiel #5
0
 def _equi_depth_conversion(self, data, min_zero=True, col_name='KPI', B=4):
     """auto binning, default is four bin, if min max not not much of difference,"""
     # trick "i is np.nan" doesn't work, has to use np.isnan() function
     num = data[~np.isnan(data)]  # remove nan for split value calculation
     # num = [i for i in data if ~np.isnan(i)]  # alternative way is slower
     if np.abs(np.std(num) /
               np.mean(num)) < 0.0:  # small variation don't bother
         return ['Stable'] * len(data)
     # p1 could either be 0 or the first 20 percentile
     min_value = max(min(num), -1e10)
     v = np.zeros(B + 1)
     for it in range(B):
         v[it] = np.percentile(num, 100.0 / B * it)
     v_uni, idx_uni = np.unqiue(v, return_index=True)
     q = np.linspace(0.0, 1.0, B + 1)
     q_uni = q[idx_uni]
     B_uni = len(q_uni) - 1
     ret = pd.qcut(data, q=q_uni, labels=range(B_uni).astype(np.float))
     return ret
Beispiel #6
0
    def _leave1_range_out(x_set, y_set, ranges, shuffle):
        """Generator that returns the range of the run left out for test and the rest for fitting
        
        Parameters:

        x_set: np.ndarray
            X set of the dataset to be splitted

        y_set: np.ndarray
            Y set of the dataset to be splitted
        
        ranges: 2D array-like
            Array representing the ranges for each class where the ranges array from ranges[i]
            is the ranges from class i

        shuffle: bool
            If True, shuffles the data

        Yields:

        class_out:
            Class that was taken out

        train_ranges:
            Training ranges

        test_range:
            Test range
        """

        classes = np.unqiue(y_set)
        for class_out, class_ranges in zip(classes, ranges):
            for range_out_index in range(len(class_ranges)):
                train_ranges = list(class_ranges)
                test_range = train_ranges.pop(range_out_index)
                train_ranges = np.hstack(np.hstack(train_ranges))
                test_range = np.array(test_range)
                if shuffle:
                    np.random.shuffle(train_ranges)
                    np.random.shuffle(test_range)

                yield class_out, train_ranges, test_range
Beispiel #7
0
def t_unique(data1, pkey):
    x, ind = np.unqiue(data1['pkey'], return_index=True)
    if ind.size < data1[pkey].size:
        t_filter(data1, ind)