def kmatrices(data, covfct, u, N=0): ''' Input (data) ndarray, data (model) modeling function - spherical - exponential - gaussian (u) unsampled point (N) number of neighboring points to consider, if zero use all ''' # u needs to be two dimensional for cdist() if np.ndim(u) == 1: u = [u] print data[:, :] print u # distance between u and each data point in P print len(data) print u[0] distanceUPoint(data, u) #-->> GPU d = data # add these distances to P P = np.hstack((data, d)) # if N>0, take the N closest points, if N > 0: P = P[d[:, 0].argsort()[:N]] # otherwise, use all of the points else: N = len(P) # apply the covariance model to the distances k = covfct(P[:, 3]) # check for nan values in k if np.any(np.isnan(k)): raise ValueError('The vector of covariances, k, contains NaN values') # cast as a matrix k = np.matrix(k).T # form a matrix of distances between existing data points K = pairwise(P[:, :2]) # apply the covariance model to these distances K = covfct(K.ravel()) # check for nan values in K if np.any(np.isnan(K)): raise ValueError('The matrix of covariances, K, contains NaN values') # re-cast as a NumPy array -- thanks M.L. K = np.array(K) # reshape into an array K = K.reshape(N, N) # cast as a matrix K = np.matrix(K) return K, k, P
def getLocalOnbeatHarmonies(self): # for third and fifth species counterpoint, use the measures to define the local context timespans self.localHarmonyDict = {} # use context.measureOffsetMap measureOffsets = self.score.measureOffsetMap( ) # get the offset for each downbeat offsetSpans = pairwise( measureOffsets) # get the start/stop offsets for each measure # include the span of the final bar measureSpan = offsetSpans[0][1] - offsetSpans[0][0] finalSpanOnset = offsetSpans[-1][1] finalSpan = (finalSpanOnset, finalSpanOnset + measureSpan) offsetSpans.append(finalSpan) # gather the content of each local context for span in offsetSpans: offsetStart = span[0] offsetEnd = span[1] harmonicEssentials = [] # partIdx = 0 for part in self.score.parts: # get all the notes in the local span localPartElements = part.flat.recurse().getElementsByOffset( offsetStart, offsetEnd, includeEndBoundary=False, mustFinishInSpan=False, mustBeginInSpan=True, includeElementsThatEndAtStart=False).notesAndRests localPartNotes = [ elem for elem in localPartElements if elem.isNote ] # get onbeat consonances or resolutions of tied-over dissonances for elem in localPartElements: if elem.isNote and elem.offset == offsetStart: if elem.tie == None: harmonicEssentials.append(elem.pitch) for elem in localPartElements: isHarmonic = True if elem.isNote and elem.offset == offsetStart and elem.tie: for n in harmonicEssentials: if not vlChecker.isTriadicConsonance( elem, note.Note(n)): isHarmonic = False break if elem.isNote and isHarmonic == True: harmonicEssentials.append(elem.pitch) else: # TODO can't just look at scale in minor because music21 uses natural minor # TODO look for actual resolution pitch that is down a step in the context for resolution in localPartElements: if resolution.isNote and resolution.offset > offsetStart and parser.isStepDown( elem, resolution): # resolution = part.scale.next(elem, 'descending') harmonicEssentials.append(resolution.pitch) self.localHarmonyDict[offsetStart] = harmonicEssentials
def find_paths_multi_tables(self, list_of_tables, fix_first=False): ''' Given a list of tables in any order, find a path that traverses all of them. If fix_first is True, then the first element will remain constant (useful when wanting to break down a specific outcome by various other variables) ''' # first get all combos, these are candidate incomplete paths (missing intermediary tables) if len(list_of_tables) == 1: return [list_of_tables] permutations = itertools.permutations(list_of_tables) if fix_first: permutations = [x for x in permutations if x[0] == list_of_tables[0]] valid_incomplete_paths = [] for permutation in permutations: is_valid = True for pair in u.pairwise(permutation): if len(self.find_paths_between_tables(start_table=pair[0], destination_table=pair[1])) == 0: is_valid = False if is_valid: valid_incomplete_paths.append(permutation) unflattened_valid_complete_paths = [] for valid_incomplete_path in valid_incomplete_paths: path_possibilities_pairwise = [] for pair in u.pairwise(valid_incomplete_path): path_possibilities_pairwise.append(self.find_paths_between_tables(start_table=pair[0], destination_table=pair[1])) combos = itertools.product(*path_possibilities_pairwise) for combo in combos: unflattened_valid_complete_paths.append(list(combo)) flattened_valid_complete_paths = [] for l in unflattened_valid_complete_paths: flattened_valid_complete_paths.append(list(u.flatten(l))) flattened_valid_complete_paths = u.remove_adjacent_repeats(flattened_valid_complete_paths) return flattened_valid_complete_paths
def kmatrices( data, covfct, u, N=0 ): ''' Input (data) ndarray, data (model) modeling function - spherical - exponential - gaussian (u) unsampled point (N) number of neighboring points to consider, if zero use all ''' # u needs to be two dimensional for cdist() if np.ndim( u ) == 1: u = [u] # distance between u and each data point in P d = cdist( data[:,:2], u ) # add these distances to P P = np.hstack(( data, d )) # if N>0, take the N closest points, if N > 0: P = P[d[:,0].argsort()[:N]] # otherwise, use all of the points else: N = len( P ) # apply the covariance model to the distances k = covfct( P[:,3] ) # check for nan values in k if np.any( np.isnan( k ) ): raise ValueError('The vector of covariances, k, contains NaN values') # cast as a matrix k = np.matrix( k ).T # form a matrix of distances between existing data points K = pairwise( P[:,:2] ) # apply the covariance model to these distances K = covfct( K.ravel() ) # check for nan values in K if np.any( np.isnan( K ) ): raise ValueError('The matrix of covariances, K, contains NaN values') # re-cast as a NumPy array -- thanks M.L. K = np.array( K ) # reshape into an array K = K.reshape(N,N) # cast as a matrix K = np.matrix( K ) return K, k, P
def setupLocalContexts(self): # TODO this currently sets up measure-length contexts # but would also like to set up harmonic spans for harmonic species ######### TODO create a custom offset map for harmonic species and use the measure map for third species # offsetspans = [] # if harmonicSpecies == True: # offsetSpans = [] # else: measureOffsets = self.score.measureOffsetMap( ) # get the offset for each downbeat offsetSpans = pairwise( measureOffsets) # get the start/stop offsets for each measure # include the span of the final bar #measureSpan = offsetSpans[0][1] - offsetSpans[0][0] measureSpan = self.parts[0].getElementsByClass( 'Measure')[-1].barDuration.quarterLength finalSpanOnset = offsetSpans[-1][1] finalSpan = (finalSpanOnset, finalSpanOnset + measureSpan) offsetSpans.append(finalSpan) for span in offsetSpans[:-1]: offsetStart = span[0] offsetEnd = span[1] cxt = LocalContext() cxt.offset = offsetStart cxt.harmonyStart = self.localHarmonyDict[offsetStart] cxt.harmonyEnd = self.localHarmonyDict[offsetEnd] # create a new stream for each context cxt.score = stream.Score() # go through the parts of the global and add notes to corresponding local parts for num, part in enumerate(self.score.parts): newpart = stream.Part() newpart.species = part.species cxt.score.append(newpart) for note in part.flat.notes: if offsetStart <= note.offset <= offsetEnd: newpart.append(note) # part-related parsing initialization # newpart.buffer = [n for n in part.flat.notes if not n.tie or n.tie.type == 'start'] # and n.tie.type != 'stop' # newpart.stack = [] # newpart.arcs = [] # newpart.openHeads = [] # newpart.openTransitions = [] self.localContexts[cxt.offset] = cxt
def variogram(data, lags, tol, method): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) (method) either 'semivariogram', or 'covariogram' Output: (cv) <2xN> NumPy array of lags and variogram values ''' # calculate the pairwise distances pwdist = utilities.pairwise(data) # create a list of lists of indices of points having the ~same lag index = [lagindices(pwdist, lag, tol) for lag in lags] # calculate the variogram at different lags given some tolerance if method in ['semivariogram', 'semi', 'sv', 's']: v = [semivariance(data, indices) for indices in index] elif method in ['covariogram', 'cov', 'co', 'cv', 'c']: v = [covariance(data, indices) for indices in index] # bundle the semivariogram values with their lags return np.array(zip(lags, v)).T
def variogram( data, lags, tol, method ): ''' Input: (data) NumPy array where the fris t two columns are the spatial coordinates, x and y (lag) the distance, h, between points (tol) the tolerance we are comfortable with around (lag) (method) either 'semivariogram', or 'covariogram' Output: (cv) <2xN> NumPy array of lags and variogram values ''' # calculate the pairwise distances pwdist = utilities.pairwise( data ) # create a list of lists of indices of points having the ~same lag index = [ lagindices( pwdist, lag, tol ) for lag in lags ] # calculate the variogram at different lags given some tolerance if method in ['semivariogram','semi','sv','s']: v = [ semivariance( data, indices ) for indices in index ] elif method in ['covariogram','cov','co','cv','c']: v = [ covariance( data, indices ) for indices in index ] # bundle the semivariogram values with their lags return np.array( zip( lags, v ) ).T
def aggregate_df(self, df_original, groupby_columns, filters, aggregate_column=None, aggregate_fxn='Count'): df = df_original.copy(deep=True) df = df.dropna() # Code to generate filter perumutations and do actual filtering filter_filters = [] for column in groupby_columns: filter = filters.get(column, None) if filter is None: series = df.loc[:, column] if is_numeric_dtype(series): min = u.reduce_precision(series.min(), 2) max = u.reduce_precision(series.max(), 2) label = f'({min}, {max})' df[column] = label filter_filters.append([label]) else: filter_filters.append(sorted(series.unique(), key=lambda x: x.upper())) elif filter['type'] == 'list': filter_filters.append(filter['filter']) df = df[df[column].isin(filter['filter'])] elif filter['type'] == 'range': bin_cuts = self.get_bin_cuts(filter['filter']['min'], filter['filter']['max'], filter['filter']['bins']) bin_labels = [str(x) for x in u.pairwise(bin_cuts)] bin_labels = [x.replace(')', ']') for x in bin_labels] df[column] = pd.cut(df[column], bin_cuts, include_lowest=True, labels=bin_labels).dropna() filter_filters.append(bin_labels) groupby_label_options = [] for filter_combo in itertools.product(*filter_filters): label = '' for i in filter_combo: label += str(i) + '_' label = label[:-1] groupby_label_options.append(label) if len(df) > 0: if aggregate_column is None: # just get the counts then df = df.groupby(groupby_columns).size() if len(groupby_columns) > 1: df = df.unstack(fill_value=0).sort_index(axis=1).stack() df = df.reset_index(name='Count') else: g = df.groupby(groupby_columns, observed=True) if aggregate_fxn == 'Count': df = g[aggregate_column].value_counts().unstack(fill_value=0).sort_index(axis=1).reset_index() elif aggregate_fxn == 'Percents': df = (g[aggregate_column].value_counts(normalize=True) * 100).round(1).unstack(fill_value=0).sort_index(axis=1).reset_index() elif aggregate_fxn == 'Sum': df = g.sum().reset_index() df[aggregate_column] = df[aggregate_column].fillna(0) elif aggregate_fxn == 'Mean': df = (g.mean()).round(2).reset_index() df[aggregate_column] = df[aggregate_column].fillna(0) elif aggregate_fxn == 'Median': df = (g.median()).round(2).reset_index() df[aggregate_column] = df[aggregate_column].fillna(0) def get_breakdown_label(row, ind_variables): return_str = '' for x in ind_variables: return_str += str(row[x]) + '_' return_str = return_str[:-1] # remove trailing underscore return return_str df['groupby_labels'] = df.apply(lambda x: get_breakdown_label(x, groupby_columns), axis=1) else: df['groupby_labels'] = None df = df.drop(columns=groupby_columns) # Some groupbys will have 0 patients, but I still want to display 0 found_labels = list(df['groupby_labels'].value_counts().index) missing_labels = [x for x in groupby_label_options if x not in found_labels] if len(missing_labels) > 0: for missing_label in missing_labels: df = df.append({'groupby_labels': missing_label}, ignore_index=True) df = df.fillna(0) def find_sort_order(row): return groupby_label_options.index(row['groupby_labels']) df['sort_order'] = df.apply(lambda x: find_sort_order(x), axis=1) df = df.sort_values(by='sort_order') df = df.drop(columns=['sort_order']) return df
f = interpolate.interp1d(eta, z_s[i], kind='nearest', assume_sorted=True) z += (f(proj[i])).reshape(N, M) z /= np.sqrt((float)(lines_num)) # point variance print("\nsigma = ", np.sqrt(np.var(z.ravel()))) # resulted variogram data = np.array([ x[:(int)(N / 2), :(int)(M / 2)].ravel(), y[:(int)(N / 2), :(int)(M / 2)].ravel(), z[:(int)(N / 2), :(int)(M / 2)].ravel() ]).transpose() #data = np.array([x.ravel(), y.ravel(), z.ravel()]).transpose() pwdist = pairwise(data) tol = dx / 2.0 lags = np.arange(0.7 * dx, cov_model.length * 6.0, 2.0 * tol) y_model = cov_model.variogram(lags) index = [lagindices(pwdist, lag, tol) for lag in lags] v = [semivariance(data, indices) for indices in index] pts_lag_size = [len(indices) for indices in index] fig = plt.figure() ax = fig.add_subplot(2, 1, 1) ax.set_xticks(lags + tol, minor=True) ax.xaxis.grid(True, which='minor') ax.plot(lags, v, '-o', markersize=5) ax.plot(lags, y_model, '-o', markersize=5) ax = fig.add_subplot(2, 1, 2)