def bac_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. ''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum (eps, tp) pos_num = sp.maximum (eps, tp+fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != 'multiclass.classification') or (label_num==1): tn = sp.maximum (eps, tn) neg_num = sp.maximum (eps, tn+fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5*(tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1./label_num # random predictions for multiclass case bac = mvmean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
def prior_log_loss(frac_pos, task=BINARY_CLASSIFICATION): """Baseline log loss. For multiplr classes ot labels return the volues for each column """ eps = 1e-15 frac_pos_ = sp.maximum(eps, frac_pos) if task != MULTICLASS_CLASSIFICATION: # binary case frac_neg = 1 - frac_pos frac_neg_ = sp.maximum(eps, frac_neg) pos_class_log_loss_ = -frac_pos * np.log(frac_pos_) neg_class_log_loss_ = -frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum( frac_pos_ ) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = -frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss
def nms(boxes, T = 0.5): if len(boxes) == 0: return [] boxes = boxes.astype("float") pick = [] x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = sp.argsort(y2) while len(idxs) > 0: last = len(idxs) - 1 i = idxs[last] pick.append(i) xx1 = sp.maximum(x1[i], x1[idxs[:last]]) yy1 = sp.maximum(y1[i], y1[idxs[:last]]) xx2 = sp.minimum(x2[i], x2[idxs[:last]]) yy2 = sp.minimum(y2[i], y2[idxs[:last]]) w = sp.maximum(0, xx2 - xx1 + 1) h = sp.maximum(0, yy2 - yy1 + 1) I = w * h #overlap_ratio = I / area[idxs[:last]] overlap_ratio = I /(area[i] + area[idxs[:last]] - I) idxs = sp.delete(idxs, sp.concatenate(([last], sp.where(overlap_ratio > T)[0]))) return boxes[pick].astype("int")
def nms(dets,proba, T): dets = dets.astype("float") if len(dets) == 0: return [] x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = proba areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = sp.maximum(x1[i], x1[order[1:]]) yy1 = sp.maximum(y1[i], y1[order[1:]]) xx2 = sp.minimum(x2[i], x2[order[1:]]) yy2 = sp.minimum(y2[i], y2[order[1:]]) w = sp.maximum(0.0, xx2 - xx1 + 1) h = sp.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = sp.where(ovr <= T)[0] order = order[inds + 1] return keep
def reload(self): if self.set: return False else: m, n = self.A.shape self.W0 = sp.maximum(sp.matrix(sp.random.normal(size=(m, self.rank))), 0) self.H0 = sp.maximum(sp.matrix(sp.random.normal(size=(self.rank, n))), 0) return True
def __init__(self, A, r, eps=10 ** -4, T=500, **kwargs): self.rank = r self.tol = eps self.maxiter = T self.set = False try: self.A = sp.matrix(A) except ValueError("Matrix incorrectly defined."): exit() except: exit("Unknow error occured.") m, n = self.A.shape if "seed" in kwargs.keys(): self.seed = kwargs["seed"] else: self.seed = False if "num" in kwargs.keys(): self.num = kwargs["num"] # koliko puta zelimo ponoviti postupka sa slucajno geneririranim matricama else: self.num = 1 if "W0" in kwargs.keys(): try: self.W0 = sp.matrix(kwargs["W0"]) if (m, r) != self.W0.shape: raise ValueError else: self.set = True except: self.W0 = sp.maximum(sp.matrix(sp.random.normal(size=(m, r))), 0) else: self.W0 = sp.maximum(sp.matrix(sp.random.normal(size=(m, r))), 0) if "H0" in kwargs.keys(): try: self.H0 = sp.matrix(kwargs["H0"]) if (r, n) != H0.shape: raise ValueError else: self.set = True except: self.H0 = sp.maximum(sp.matrix(sp.random.normal(size=(r, n))), 0) else: self.H0 = sp.maximum(sp.matrix(sp.random.normal(size=(r, n))), 0) if "rw" in kwargs.keys(): self.rw = rw else: self.rw = 1
def balanced_accuracy(solution, prediction): y_type, solution, prediction = _check_targets(solution, prediction) if y_type not in ["binary", "multiclass", 'multilabel-indicator']: raise ValueError("{0} is not supported".format(y_type)) if y_type == 'binary': # Do not transform into any multiclass representation pass elif y_type == 'multiclass': # Need to create a multiclass solution and a multiclass predictions max_class = int(np.max((np.max(solution), np.max(prediction)))) solution_binary = np.zeros((len(solution), max_class + 1)) prediction_binary = np.zeros((len(prediction), max_class + 1)) for i in range(len(solution)): solution_binary[i, int(solution[i])] = 1 prediction_binary[i, int(prediction[i])] = 1 solution = solution_binary prediction = prediction_binary elif y_type == 'multilabel-indicator': solution = solution.toarray() prediction = prediction.toarray() else: raise NotImplementedError('bac_metric does not support task type %s' % y_type) fn = np.sum(np.multiply(solution, (1 - prediction)), axis=0, dtype=float) tp = np.sum(np.multiply(solution, prediction), axis=0, dtype=float) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum(eps, tp) pos_num = sp.maximum(eps, tp + fn) tpr = tp / pos_num # true positive rate (sensitivity) if y_type in ('binary', 'multilabel-indicator'): tn = np.sum(np.multiply((1 - solution), (1 - prediction)), axis=0, dtype=float) fp = np.sum(np.multiply((1 - solution), prediction), axis=0, dtype=float) tn = sp.maximum(eps, tn) neg_num = sp.maximum(eps, tn + fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5 * (tpr + tnr) elif y_type == 'multiclass': label_num = solution.shape[1] bac = tpr else: raise ValueError(y_type) return np.mean(bac) # average over all classes
def f1_metric(solution, prediction, task=BINARY_CLASSIFICATION): """ Compute the normalized f1 measure. The binarization differs for the multi-label and multi-class case. A non-weighted average over classes is taken. The score is normalized. :param solution: :param prediction: :param task: :return: """ label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn, fp, tp, fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 true_pos_num = sp.maximum(eps, tp + fn) found_pos_num = sp.maximum(eps, tp + fp) tp = sp.maximum(eps, tp) tpr = tp / true_pos_num # true positive rate (recall) ppv = tp / found_pos_num # positive predictive value (precision) arithmetic_mean = 0.5 * sp.maximum(eps, tpr + ppv) # Harmonic mean: f1 = tpr * ppv / arithmetic_mean # Average over all classes f1 = np.mean(f1) # Normalize: 0 for random, 1 for perfect if (task != MULTICLASS_CLASSIFICATION) or (label_num == 1): # How to choose the "base_f1"? # For the binary/multilabel classification case, one may want to predict all 1. # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos) # frac_pos = mvmean(solution.ravel()) # base_f1 = 2 * frac_pos / (1+frac_pos) # or predict random values with probability 0.5, in which case # base_f1 = 0.5 # the first solution is better only if frac_pos > 1/3. # The solution in which we predict according to the class prior frac_pos gives # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5 # So, because the f1 score is used if frac_pos is small (typically <0.1) # the best is to assume that base_f1=0.5 base_f1 = 0.5 # For the multiclass case, this is not possible (though it does not make much sense to # use f1 for multiclass problems), so the best would be to assign values at random to get # tpr=ppv=frac_pos, where frac_pos=1/label_num else: base_f1 = 1. / label_num score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1)) return score
def periodic_jacobian(self, params, eps, relativeScale=False, stepSizeCutoff=None): """ Return a KeyedList of the derivatives of the model residuals w.r.t. parameters. The method uses finite differences. Inputs: params -- Parameters about which to calculate the jacobian eps -- Step size to take, may be vector or scalar. relativeScale -- If true, the eps is taken to be the fractional change in parameter to use in finite differences. stepSizeCutoff -- Minimum step size to take. """ res = self.resDict(params) orig_vals = scipy.array(params) if stepSizeCutoff is None: stepSizeCutoff = scipy.sqrt(_double_epsilon_) if relativeScale: eps_l = scipy.maximum(eps * abs(params), stepSizeCutoff) else: eps_l = scipy.maximum(eps * scipy.ones(len(params),scipy.float_), stepSizeCutoff) J = KeyedList() # will hold the result for resId in res.keys(): J.set(resId, []) # Two-sided finite difference for ii in range(len(params)): params[ii] = orig_vals[ii] + eps_l[ii] resPlus = self.resDict(params) params[ii] = orig_vals[ii] - eps_l[ii] resMinus = self.resDict(params) params[ii] = orig_vals[ii] for resId in res.keys(): res_deriv = (resPlus[resId]-resMinus[resId])/(2.*eps_l[ii]) J.get(resId).append(res_deriv) # NOTE: after call to ComputeResidualsWithScaleFactors the Model's # parameters get updated, must reset this: self.params.update(params) return J
def hessian_elem(self, func, f0, params, i, j, epsi, epsj, relativeScale, stepSizeCutoff, verbose): """ Return the second partial derivative for func w.r.t. parameters i and j f0: The value of the function at params eps: Sets the stepsize to try relativeScale: If True, step i is of size p[i] * eps, otherwise it is eps stepSizeCutoff: The minimum stepsize to take """ origPi, origPj = params[i], params[j] if relativeScale: # Steps sizes are given by eps*the value of the parameter, # but the minimum step size is stepSizeCutoff hi, hj = scipy.maximum((epsi*abs(origPi), epsj*abs(origPj)), (stepSizeCutoff, stepSizeCutoff)) else: hi, hj = epsi, epsj if i == j: params[i] = origPi + hi fp = func(params) params[i] = origPi - hi fm = func(params) element = (fp - 2*f0 + fm)/hi**2 else: ## f(xi + hi, xj + h) params[i] = origPi + hi params[j] = origPj + hj fpp = func(params) ## f(xi + hi, xj - hj) params[i] = origPi + hi params[j] = origPj - hj fpm = func(params) ## f(xi - hi, xj + hj) params[i] = origPi - hi params[j] = origPj + hj fmp = func(params) ## f(xi - hi, xj - hj) params[i] = origPi - hi params[j] = origPj - hj fmm = func(params) element = (fpp - fpm - fmp + fmm)/(4 * hi * hj) params[i], params[j] = origPi, origPj self._notify(event = 'hessian element', i = i, j = j, element = element) if verbose: print 'hessian[%i, %i] = %g' % (i, j, element) return element
def generateThumbnail(inputFile, thumbSize): global size # logging.debug('Input File: %s\n' % inputFile) # logging.debug('Ouput File: %s\n' % outputFile) # logging.debug('Thumb Size: %s\n' % thumbSize) h5f = tables.openFile(inputFile) dataSource = HDFDataSource.DataSource(inputFile, None) md = MetaData.genMetaDataFromSourceAndMDH(dataSource, MetaDataHandler.HDFMDHandler(h5f)) xsize = h5f.root.ImageData.shape[1] ysize = h5f.root.ImageData.shape[2] if xsize > ysize: zoom = float(thumbSize) / xsize else: zoom = float(thumbSize) / ysize size = (int(xsize * zoom), int(ysize * zoom)) im = h5f.root.ImageData[min(md.EstimatedLaserOnFrameNo + 10, (h5f.root.ImageData.shape[0] - 1)), :, :].astype("f") im = im.T - min(md.Camera.ADOffset, im.min()) h5f.close() im = maximum(minimum(1 * (255 * im) / im.max(), 255), 0) return im.astype("uint8")
def _sampling_matrix(hessian, cutoff=0, temperature=1, step_scale=1): # basically need SVD of hessian - singular values and eigenvectors # hessian = u * diag(singVals) * vh u, sing_vals, vh = scipy.linalg.svd(0.5 * hessian) # scroll through the singular values and find the ones whose inverses will # be huge and set them to zero also, load up the array of singular values # that we store # cutoff = (1.0/_.singVals[0])*1.0e03 # double cutoff = _.singVals[0]*1.0e-02 cutoff_sing_val = cutoff * max(sing_vals) D = 1.0 / scipy.maximum(sing_vals, cutoff_sing_val) ## now fill in the sampling matrix ("square root" of the Hessian) ## note that sqrt(D[i]) is taken here whereas Kevin took sqrt(D[j]) ## this is because vh is the transpose of his PT -JJW samp_mat = scipy.transpose(vh) * scipy.sqrt(D) # Divide the sampling matrix by an additional factor such # that the expected quadratic increase in cost will be about 1. cutoff_vals = scipy.compress(sing_vals < cutoff_sing_val, sing_vals) if len(cutoff_vals): scale = scipy.sqrt( len(sing_vals) - len(cutoff_vals) + sum(cutoff_vals) / cutoff_sing_val) else: scale = scipy.sqrt(len(sing_vals)) samp_mat /= scale samp_mat *= step_scale samp_mat *= scipy.sqrt(temperature) return samp_mat
def logloss(Y_true, Y_pred): epsilon = 1e-15 pred = sp.maximum(epsilon, Y_pred) pred = sp.minimum(1-epsilon, Y_pred) ll = sum(Y_true*sp.log(pred) + sp.subtract(1,Y_true)*sp.log(sp.subtract(1,Y_pred))) ll = ll * -1.0/len(Y_true) return ll
def test(self): pot = lambda x,y,z: self.m/2.*(self.wx**2*x**2+self.wy**2*y**2+self.wz**2*z**2) u0 = 4*pi*hbar**2*self.a0/self.m TF = lambda x,y,z: scipy.maximum(self.mu-pot(x,y,z)/u0,0) import mpmath print 'hi' print mpmath.quad(TF,[-self.Rx,self.Rx],[-self.Ry,self.Ry],[-self.Rz,self.Rz])
def psiTF_1d(self,x=None, w = None): if x == None: x = self.x_1d if w == None: w = self.wx interaction = 4*pi*hbar**2*self.a1d/self.m return (scipy.maximum(0,(self.mu-self.harm_pot_1d(x,w))/interaction))**.5
def hessian_elem(self, func, f0, params, i, j, epsi, epsj, relativeScale, stepSizeCutoff, verbose): """ Return the second partial derivative for func w.r.t. parameters i and j f0: The value of the function at params eps: Sets the stepsize to try relativeScale: If True, step i is of size p[i] * eps, otherwise it is eps stepSizeCutoff: The minimum stepsize to take """ origPi, origPj = params[i], params[j] if relativeScale: # Steps sizes are given by eps*the value of the parameter, # but the minimum step size is stepSizeCutoff hi, hj = scipy.maximum((epsi*abs(origPi), epsj*abs(origPj)), (stepSizeCutoff, stepSizeCutoff)) else: hi, hj = epsi, epsj if i == j: params[i] = origPi + hi fp = func(params) params[i] = origPi - hi fm = func(params) element = (fp - 2*f0 + fm)/hi**2 else: ## f(xi + hi, xj + h) params[i] = origPi + hi params[j] = origPj + hj fpp = func(params) ## f(xi + hi, xj - hj) params[i] = origPi + hi params[j] = origPj - hj fpm = func(params) ## f(xi - hi, xj + hj) params[i] = origPi - hi params[j] = origPj + hj fmp = func(params) ## f(xi - hi, xj - hj) params[i] = origPi - hi params[j] = origPj - hj fmm = func(params) element = (fpp - fpm - fmp + fmm)/(4 * hi * hj) params[i], params[j] = origPi, origPj self._notify(event = 'hessian element', i = i, j = j, element = element) if verbose: print('hessian[%i, %i] = %g' % (i, j, element)) return element
def generateThumbnail(inputFile, thumbSize): global size #logging.debug('Input File: %s\n' % inputFile) #logging.debug('Ouput File: %s\n' % outputFile) #logging.debug('Thumb Size: %s\n' % thumbSize) h5f = tables.openFile(inputFile) dataSource = HDFDataSource.DataSource(inputFile, None) md = MetaData.genMetaDataFromSourceAndMDH( dataSource, MetaDataHandler.HDFMDHandler(h5f)) xsize = h5f.root.ImageData.shape[1] ysize = h5f.root.ImageData.shape[2] if xsize > ysize: zoom = float(thumbSize) / xsize else: zoom = float(thumbSize) / ysize size = (int(xsize * zoom), int(ysize * zoom)) im = h5f.root.ImageData[min(md.EstimatedLaserOnFrameNo + 10, (h5f.root.ImageData.shape[0] - 1)), :, :].astype('f') im = im.T - min(md.Camera.ADOffset, im.min()) h5f.close() im = maximum(minimum(1 * (255 * im) / im.max(), 255), 0) return im.astype('uint8')
def logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1 - epsilon, p) ll = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) ll = ll * -1.0 / len(y) return ll
def logloss(actual, predict): epsilon = 1e-15 predict = sp.maximum(epsilon, predict) predict = sp.minum(1 - epsilon, predict) loss = sum(actual * sp.log(predict) + sp.subtract(1, actual) * sp.log(sp.subtract(1, predict))) loss = loss * -1.0 / len(actual) return loss
def set_normal_free_energy(self): """ Set free energy as a function of odorant; normal tuning curve. """ self.eps_base = self.mu_eps + self.normal_eps_tuning_prefactor* \ sp.exp(-(1.*sp.arange(self.Mm))**2.0/(2.0* \ self.normal_eps_tuning_width)**2.0) self.eps_base += random_matrix(self.Mm, params=[0, self.sigma_eps], seed=self.seed_eps) # If dual signal, use the average of the FULL signal nonzero components if self.Kk_split == 0: self.eps = self.WL_scaling * sp.log(self.mu_Ss0) + self.eps_base else: self.eps = self.WL_scaling*sp.log(sp.average(self.Ss\ [self.Ss != 0])) + self.eps_base # Apply max and min epsilon value to each component self.min_eps = random_matrix( self.Mm, params=[self.mu_min_eps, self.sigma_min_eps], seed=self.seed_eps) self.max_eps = random_matrix( self.Mm, params=[self.mu_max_eps, self.sigma_max_eps], seed=self.seed_eps) self.eps = sp.maximum(self.eps, self.min_eps) self.eps = sp.minimum(self.eps, self.max_eps) # If an array of signals, replicate for each signal. if len(self.Ss.shape) > 1: self.eps = sp.tile(self.eps, [self.Ss.shape[1], 1]).T
def get_rdots(cval, ccoh, sval, scoh, density, size, w, dirs, ndirs): """""" im = sp.zeros((size, size)) gx, gy = sp.mgrid[-size // 2 + 1:size // 2 + 1, -size // 2 + 1:size // 2 + 1] gr = sp.maximum(sp.absolute(gx), sp.absolute(gy)) mask_c = (gr < w / 2.0) mask_s = (gr < w) * (gr >= w / 2.0) # get random dot locations first mask_c_dot = sp.zeros((size, size), dtype=bool) mask_s_dot = sp.zeros((size, size), dtype=bool) mask_c_dot[mask_c] = sp.random.choice([False, True], p=[1.0 - density, density], size=(mask_c.sum(), )) mask_s_dot[mask_s] = sp.random.choice([False, True], p=[1.0 - density, density], size=(mask_s.sum(), )) # then fill in with stimulus values (directions) im[mask_c_dot] = sp.random.choice(dirs, p=get_p(cval, ccoh, dirs, ndirs), size=(mask_c_dot.sum(), )) im[mask_s_dot] = sp.random.choice(dirs, p=get_p(sval, scoh, dirs, ndirs), size=(mask_s_dot.sum(), )) return im
def log_loss(self, act, pred, epsilon=1e-07): pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def llfun(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def get_population(stimulus, kind, noise_loc=NOISE_LOC, noise_scale=NOISE_SCALE, **kwargs): """""" if kind in ['gaussian', 'normal']: population = get_population_gaussian(stimulus, **kwargs) elif kind == 'circular': population = get_population_circular(stimulus, **kwargs) elif kind == 'monotonic': population = get_population_monotonic(stimulus, **kwargs) elif kind == 'lognormal': population = get_population_lognormal(stimulus, **kwargs) elif kind == 'agaussian': population = get_population_agaussian(stimulus, **kwargs) else: raise ValueError('Invalid distribution type for the tuning curves') if noise_scale: noise = sp.random.normal(loc=noise_loc, scale=noise_scale, size=population.shape) population += sp.maximum(noise, 0) bg_values = sp.isnan(population).any(axis=0) population[:, bg_values] = 0 population /= (population.max() + (population.max() == 0)) return population
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def _sampling_matrix(hessian, cutoff=0, temperature=1, step_scale=1): # basically need SVD of hessian - singular values and eigenvectors # hessian = u * diag(singVals) * vh u, sing_vals, vh = scipy.linalg.svd(0.5 * hessian) # scroll through the singular values and find the ones whose inverses will # be huge and set them to zero also, load up the array of singular values # that we store # cutoff = (1.0/_.singVals[0])*1.0e03 # double cutoff = _.singVals[0]*1.0e-02 cutoff_sing_val = cutoff * max(sing_vals) D = 1.0/scipy.maximum(sing_vals, cutoff_sing_val) ## now fill in the sampling matrix ("square root" of the Hessian) ## note that sqrt(D[i]) is taken here whereas Kevin took sqrt(D[j]) ## this is because vh is the transpose of his PT -JJW samp_mat = scipy.transpose(vh) * scipy.sqrt(D) # Divide the sampling matrix by an additional factor such # that the expected quadratic increase in cost will be about 1. cutoff_vals = scipy.compress(sing_vals < cutoff_sing_val, sing_vals) if len(cutoff_vals): scale = scipy.sqrt(len(sing_vals) - len(cutoff_vals) + sum(cutoff_vals)/cutoff_sing_val) else: scale = scipy.sqrt(len(sing_vals)) samp_mat /= scale samp_mat *= step_scale samp_mat *= scipy.sqrt(temperature) return samp_mat
def _initParams_regressOut(self, Ycc, X, varXX): """ initialize the gp parameters 1) the variance of Kcc as Ycc.var(0).mean() 2) X with the provided 3) variance of interaction (if label is True) will be set to ~0 4) residual to residual """ X *= SP.sqrt(varXX / (X**2).mean()) Y1 = self.Y - Ycc a = SP.array([SP.sqrt(Ycc.var(0).mean())]) b = 1e-3 * SP.ones(1) c = Y1.var(0).mean() - varXX c = SP.maximum(1e-1, c) c = SP.array([SP.sqrt(c)]) # gp hyper params params = limix.CGPHyperParams() if self.interaction: params['covar'] = SP.concatenate( [a, X.reshape(self.N * self.k, order='F'), SP.ones(1), b]) else: params['covar'] = SP.concatenate( [a, X.reshape(self.N * self.k, order='F')]) params['lik'] = c return params
def log_lpsvm(p, v, inplace=False): if inplace: out = v else: out = sp.zeros_like(v) out[:] = -(sp.maximum(1.0 - v, 0)**p) return out
def set_reach_dist(SetOfObjects, point_index, epsilon): """ Sets reachability distance and ordering. This function is the primary workhorse of the OPTICS algorithm. SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. (float) """ row = [SetOfObjects.data[point_index, :]] indices = np.argsort(row) distances = np.sort(row) if scipy.iterable(distances): unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum( distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) if unprocessed.size > 0: return unprocessed[np.argsort( np.array(SetOfObjects._reachability[unprocessed]))[0]] else: return point_index else: return point_index
def solve2DDynamics(p, spikeTrain, dispFig=1): p['ic'] = sc.array([0.0001, p['q_Infty']]) pulses = DiracComb(p, spikeTrain) c2, q2 = RK2_Autonomous(f=presynSTSP_2D, pars=p, eParNames=['c_PreIn'], eParList=[pulses]) rSS2 = Hill(c2, p['c_halfAct_r_muM'], p['c_coop_r']) nt = q2 * rSS2 if dispFig == 1: f1 = gr.figure(figsize=(15, 7)) rows = 3 cols = 1 ax = list() ax = [f1.add_subplot(rows, cols, n + 1) for n in range(rows * cols)] ax[0].plot(p['sampTimes'], c2, 'orange' + '.', label='$c(t)$') ax[0].plot(p['sampTimes'], rSS2, 'b.', lw=3, alpha=0.4, label='$r_{\infty}(c)$') ax[1].plot(p['sampTimes'], q2, 'k.', label='$q(t)$') ax[2].plot(p['sampTimes'], nt, 'k.', label='$r_{\infty}(c)q(t)$') ax[0].plot(spikeTrain, sc.maximum(c2.max(), rSS2.max()) * sc.ones(len(spikeTrain)), 'r|', ms=10) ax[1].plot(spikeTrain, sc.ones(len(spikeTrain)), 'r|', ms=10) ax[2].plot(spikeTrain, -0.01 * sc.ones(len(spikeTrain)), 'r|', ms=10) [ax[n].legend() for n in range(rows * cols)] return c2, q2, rSS2
def llfun(act, pred,idx): epsilon = 1e-15 pred = sp.maximum(epsilon, pred[idx]) pred = sp.minimum(1-epsilon, pred) ll = sum(act[idx]*sp.log(pred) + sp.subtract(1,act[idx])*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act[idx]) return ll
def pac_metric(solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag = False [sample_num, label_num] = solution.shape if label_num == 1: task = 'binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1. * sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k, :] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss - base_log_loss)) if len(diff.shape) > 0: diff = max(diff) if (diff) > 1e-10: print('Arrggh {} != {}'.format(the_base_log_loss, base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
def findnext(self): if self.nsam==0: return [0.5*(sp.matrix(self.upper)+sp.matrix(self.lower)),0] if self.finished: raise StandardError("opt is finished") self.cc=0 fudge=2. EIwrap= lambda x,y : (-self.evalWEI(sp.matrix(x)),0) [x,EImin,ierror]=DIRECT.solve(EIwrap,self.lower,self.upper,user_data=[],algmethod=1,maxf=4000) while self.cc==0 and fudge<=self.fudgelimit: print "non nonzero eis found over full range. trying closer to current min with lengthfactor: "+str(fudge) u=sp.matrix(self.upper) l=sp.matrix(self.lower) dia=u-l lw=sp.maximum(l,self.best[0]-dia/fudge) up=sp.minimum(u,self.best[0]+dia/fudge) [x,EImin,ierror]=DIRECT.solve(EIwrap,lw,up,user_data=[],algmethod=1,maxf=4000) fudge*=2. print "nonzero EIs: " +str(self.cc) if self.cc==0: print "done. no nonzero EIs" self.finished=True #raise StandardError("opt is finished") return [self.best[0],0.] return sp.matrix(x),-EImin
def logloss(act, pred): epsilon = 1e-4 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = -1.0/len(act) * sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) return ll
def plotgpsonly(TEClist,gpslist,plotdir,m,ax,fig,latlim,lonlim): """ Makes a set of plots when only gps data is avalible.""" maxplot = len(gpslist) strlen = int(sp.ceil(sp.log10(maxplot))+1) fmstr = '{0:0>'+str(strlen)+'}_' plotnum=0 for gps_cur in gpslist: gpshands = [] gpsmin = sp.inf gpsmax = -sp.inf for igpsn, (igps,igpslist) in enumerate(zip(TEClist,gps_cur)): print('Plotting GPS data from rec {0} of {1}'.format(igpsn,len(gps_cur))) # check if there's anything to plot if len(igpslist)==0: continue (sctter,scatercb) = scatterGD(igps,'alt',3.5e5,vbounds=[0,20],time = igpslist,gkey = 'vTEC',cmap='plasma',fig=fig, ax=ax,title='',cbar=True,err=.1,m=m) gpsmin = sp.minimum(igps.times[igpslist,0].min(),gpsmin) gpsmax = sp.maximum(igps.times[igpslist,0].max(),gpsmax) gpshands.append(sctter) scatercb.set_label('vTEC in TECu') #change he z order print('Ploting {0} of {1} plots'.format(plotnum,maxplot)) plt.savefig(os.path.join(plotdir,fmstr.format(plotnum)+'GPSonly.png')) plotnum+=1 for i in reversed(gpshands): i.set_zorder(i.get_zorder()+1)
def set_reach_dist(SetOfObjects,point_index,epsilon): ### Assumes that the query returns ordered (smallest distance first) entries ### ### This is the case for the balltree query... ### ### ...switching to a query structure that does not do this will break things! ### ### And break in a non-obvious way: For cases where multiple entries are tied in ### ### reachablitly distance, it will cause the next point to be processed in ### ### random order, instead of the closest point. This may manefest in edge cases ### ### where different runs of OPTICS will give different ordered lists and hence ### ### different clustering structure...removing reproducability. ### distances, indices = SetOfObjects.query(SetOfObjects.data[point_index], SetOfObjects._nneighbors[point_index]) ## Checks to see if there more than one member in the neighborhood ## if scipy.iterable(distances): ## Masking processed values ## unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum(SetOfObjects._reachability[unprocessed], rdistances) ### Checks to see if everything is already processed; if so, return control to main loop ## if unprocessed.size > 0: ### Define return order based on reachability distance ### return sorted(zip(SetOfObjects._reachability[unprocessed],unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: ## Not sure if this else statement is actaully needed... ## return point_index
def evaluate_ll(y, yhat): epsilon = 1e-15 yhat = sp.maximum(epsilon, yhat) yhat = sp.minimum(1-epsilon, yhat) ll = sum(y*sp.log(yhat) + sp.subtract(1,y)*sp.log(sp.subtract(1,yhat))) ll = ll * -1.0/len(y) return ll
def set_reach_dist(SetOfObjects, point_index, epsilon): """ Sets reachability distance and ordering. This function is the primary workhorse of the OPTICS algorithm. SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. (float) """ row = [SetOfObjects.data[point_index,:]] indices = np.argsort(row) distances = np.sort(row) if scipy.iterable(distances): unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) if unprocessed.size > 0: return unprocessed[np.argsort(np.array(SetOfObjects._reachability[ unprocessed]))[0]] else: return point_index else: return point_index
def logloss(act, pred): pred = sp.maximum(1e-15, pred) pred = sp.minimum(1 - 1e-15, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def logloss(self, y, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(y*sp.log(pred) + sp.subtract(1,y)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(y) return ll
def entropyloss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) el = sum(act*sp.log10(pred) + sp.subtract(1,act)*sp.log10(sp.subtract(1,pred))) el = el * -1.0/len(act) return el
def logLoss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def degree_distrib(net, deg_type="total", node_list=None, use_weights=True, log=False, num_bins=30): ''' Computing the degree distribution of a network. Parameters ---------- net : :class:`~nngt.Graph` or subclass the network to analyze. deg_type : string, optional (default: "total") type of degree to consider ("in", "out", or "total"). node_list : list or numpy.array of ints, optional (default: None) Restrict the distribution to a set of nodes (default: all nodes). use_weights : bool, optional (default: True) use weighted degrees (do not take the sign into account: all weights are positive). log : bool, optional (default: False) use log-spaced bins. Returns ------- counts : :class:`numpy.array` number of nodes in each bin deg : :class:`numpy.array` bins ''' ia_node_deg = net.get_degrees(node_list, deg_type, use_weights) ra_bins = sp.linspace(ia_node_deg.min(), ia_node_deg.max(), num_bins) if log: ra_bins = sp.logspace(sp.log10(sp.maximum(ia_node_deg.min(),1)), sp.log10(ia_node_deg.max()), num_bins) counts,deg = sp.histogram(ia_node_deg, ra_bins) ia_indices = sp.argwhere(counts) return counts[ia_indices], deg[ia_indices]
def _set_reach_dist(setofobjects, point_index, epsilon): # Assumes that the query returns ordered (smallest distance first) # entries. This is the case for the balltree query... dists, indices = setofobjects.query(setofobjects.data[point_index], setofobjects._nneighbors[point_index]) # Checks to see if there more than one member in the neighborhood ## if sp.iterable(dists): # Masking processed values ## # n_pr is 'not processed' n_pr = indices[(setofobjects._processed[indices] < 1)[0].T] rdists = sp.maximum(dists[(setofobjects._processed[indices] < 1)[0].T], setofobjects.core_dists_[point_index]) new_reach = sp.minimum(setofobjects.reachability_[n_pr], rdists) setofobjects.reachability_[n_pr] = new_reach # Checks to see if everything is already processed; # if so, return control to main loop ## if n_pr.size > 0: # Define return order based on reachability distance ### return n_pr[sp.argmin(setofobjects.reachability_[n_pr])] else: return point_index
def set_reach_dist(SetOfObjects, point_index, epsilon): # Assumes that the query returns ordered (smallest distance first) # entries. This is the case for the balltree query... # distances, indices = SetOfObjects.query(SetOfObjects.data[point_index], # SetOfObjects._nneighbors[point_index]) row = [SetOfObjects.data[point_index,:]] indices = np.argsort(row) distances = np.sort(row) # Checks to see if there more than one member in the neighborhood ## if scipy.iterable(distances): # Masking processed values ## unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum( distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[ unprocessed] = scipy.minimum( SetOfObjects._reachability[ unprocessed], rdistances) # Checks to see if everything is already processed; # if so, return control to main loop ## if unprocessed.size > 0: # Define return order based on reachability distance ### return sorted(zip(SetOfObjects._reachability[unprocessed], unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: # Not sure if this else statement is actaully needed... ## return point_index
def addColorMap_grey(self, listeElectrodes, zVal, minZZ=None, maxZZ=None, sVal=None, smin=0.0, smax=1.0): valDict = {} for elect, val in zip(listeElectrodes, zVal): valDict[elect] = val if minZZ is None: minZZ = min(zVal) if maxZZ is None: maxZZ = max(zVal) h = (getZZ(listeElectrodes, valDict) - minZZ) / (maxZZ - minZZ) s = None if not sVal is None: sDict = {} for elect, val in zip(listeElectrodes, sVal): sDict[elect] = val ss = getZZ(listeElectrodes, sDict) deltaSS = smax - smin if deltaSS == 0: s = (ss >= smin).astype(np.float32) else: s = (ss - smin) / deltaSS self.s = ss self.ColorMap = ones((h.shape[0], h.shape[1], 3)) self.ColorMap[:, :, 0] = minimum(maximum(h, 0.0), 1.0) self.ColorMap[:, :, 1] = self.ColorMap[:, :, 0] self.ColorMap[:, :, 2] = self.ColorMap[:, :, 0] #else S = 1 # V = 1 self.ColorMap[isnan(h), :] = [1.0, 1.0, 1.0] #use white for nans steps = arange(0, 1.1, 0.1) hsv = ones((1, 11, 3)) hsv[:, :, 0] = -(steps - 1) * 2.0 / 3.0 rgb = hsv_to_rgb(hsv) R = [((step, r, r)) for step, r in zip(steps, rgb[0, :, 0])] G = [((step, g, g)) for step, g in zip(steps, rgb[0, :, 1])] B = [((step, b, b)) for step, b in zip(steps, rgb[0, :, 2])] self.colorbarDict = { 'red': tuple(R), 'green': tuple(G), 'blue': tuple(B), 'min': minZZ, 'max': maxZZ }
def cross_entropy(act, pred): #negative log-loss sklearn epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract( 1, pred)) return -ll
def logloss(real, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(real * sp.log(pred) + sp.subtract(1, real) * sp.log(sp.subtract(1, pred))) ll = ll * 1.0 / len(real) return ll
def log_loss(act, pred): """ Vectorised computation of logloss """ epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def logloss(act, predicted): predicted = sp.minimum(1-(1e-15), sp.maximum(1e-15, predicted)) v1 = act*sp.log(predicted) v2 = sp.subtract(1,act) v3 = sp.log(sp.subtract(1,predicted)) LogLoss = sum(v1 + v2 * v3) LogLoss = LogLoss * (-1.0/len(act)) return LogLoss
def logloss(self, act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) pred[pred >= 1] = 0.9999999 ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def log_loss(y_true, y_pred, eps=1e-15): """ As used by Kaggle. """ y_pred = sp.maximum(eps, y_pred) y_pred = sp.minimum(1 - eps, y_pred) ll = sum(y_true * sp.log(y_pred) + sp.subtract(1, y_true) * sp.log(sp.subtract(1, y_pred))) ll = ll * -1.0 / len(y_true) return ll
def logloss(label, prediction): epsilon = 1e-15 prediction = sp.maximum(epsilon, prediction) prediction = sp.minimum(1 - epsilon, prediction) ll = sum(label * sp.log(prediction) + sp.subtract(1, label) * sp.log(sp.subtract(1, prediction))) ll = ll * -1.0 / len(label) print(ll)
def logloss_metric(p, y): logloss = 0 for i in range(0, len(p)): for j in range(0, len(p[i])): if y[i] == float(j): logloss += np.log( spss.maximum(spss.minimum(p[i][j], 1 - (1e-15)), 1e-15)) return -logloss / float(len(y))
def report( right_list, pre_list ): epsilon = 1e-15 act = right_list pred = sp.maximum(epsilon, pre_list) pred = sp.minimum(1-epsilon, pre_list) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
def logloss(pred, dtrain): act = dtrain.get_label() epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return 'logloss',ll
def entropyloss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) el = sum(act * sp.log10(pred) + sp.subtract(1, act) * sp.log10(sp.subtract(1, pred))) el = el * -1.0 / len(act) return el