def visualize_depth_image(data): data[data == 0.0] = np.nan maxdepth = np.nanmax(data) mindepth = np.nanmin(data) data = data.copy() data -= mindepth data /= (maxdepth - mindepth) gray = np.zeros(list(data.shape) + [3], dtype=data.dtype) data = (1.0 - data) gray[..., :3] = np.dstack((data, data, data)) # use a greenish color to visualize missing depth gray[np.isnan(data), :] = (97, 160, 123) gray[np.isnan(data), :] /= 255 gray = exposure.equalize_hist(gray) # set alpha channel gray = np.dstack((gray, np.ones(data.shape[:2]))) gray[np.isnan(data), -1] = 0.5 return gray * 255
def transform(self, data): assert np.isfinite(data).all() ntest = len(data) data = data.copy() data.shape = ntest, -1 assert np.isfinite(data).all() print ">>> Computing traintest linear kernel" start = time.time() kernel_traintest = np.dot(data, self._train_data.T) assert not np.isnan(kernel_traintest).any() assert not np.isinf(kernel_traintest).any() kernel_traintest /= self._ktrace assert not np.isnan(kernel_traintest).any() assert not np.isinf(kernel_traintest).any() end = time.time() print "Time: %s" % (end-start) return self._clf.decision_function(kernel_traintest).ravel()
def ll(actual, predicted): """ Computes the log likelihood. This function computes the log likelihood between two numbers, or for element between a pair of lists or numpy arrays. Parameters ---------- actual : int, float, list of numbers, numpy array The ground truth value predicted : same type as actual The predicted value Returns ------- score : double or list of doubles The log likelihood error between actual and predicted """ actual = np.array(actual) predicted = np.array(predicted) err = np.seterr(all='ignore') score = -(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted)) np.seterr(divide=err['divide'], over=err['over'], under=err['under'], invalid=err['invalid']) if type(score) == np.ndarray: score[np.isnan(score)] = 0 else: if np.isnan(score): score = 0 return score
def _crop_out_special_values(self, ws): if ws.getNumberHistograms() != 1: # Strip zeros is only possible on 1D workspaces return y_vals = ws.readY(0) length = len(y_vals) # Find the first non-zero value start = 0 for i in range(0, length): if not np.isnan(y_vals[i]) and not np.isinf(y_vals[i]): start = i break # Now find the last non-zero value stop = 0 length -= 1 for j in range(length, 0, -1): if not np.isnan(y_vals[j]) and not np.isinf(y_vals[j]): stop = j break # Find the appropriate X values and call CropWorkspace x_vals = ws.readX(0) start_x = x_vals[start] # Make sure we're inside the bin that we want to crop end_x = x_vals[stop + 1] return self._crop_to_x_range(ws=ws,x_min=start_x, x_max=end_x)
def responsetime(conn): """ Determine the average response time for tasks in bins """ c = conn.cursor() results = c.execute(""" select finished.time, event.time, finished.time - event.time as responsetime from event left join (select time, task_id from event where type_id=""" + taskid("run_task") + """) as finished on event.task_id = finished.task_id where event.type_id=""" + taskid("add_task")).fetchall() results = np.matrix(results, dtype=float) runtimes = results[:,2] nones = runtimes == np.array(None) (finished, nofinish) = (runtimes[~np.isnan(runtimes).all(axis=1)], runtimes[np.isnan(runtimes).any(axis=1)]) return { "completion":{ "finished":finished.size, "dnf":nofinish.size, }, "response_times":{ "min":np.min(finished), "mean":np.mean(finished), "max":np.max(finished), "std":np.std(finished) } }
def __init__(self, data, classes, tree_features, n_trees=100): self.n_features = np.shape(data)[1] n_rows = np.shape(data)[0] n_nans = np.sum(np.isnan(data), 0) data = data[:, n_nans < n_rows] self.n_features = np.shape(data)[1] n_nans = np.sum(np.isnan(data), 1) data = data[n_nans < self.n_features, :] self.n_rows = np.shape(data)[0] if (tree_features > self.n_features): tree_features = self.n_features self.col_list = np.zeros((n_trees, tree_features), dtype='int') self.n_trees = n_trees self.bags = [] for i in range(n_trees): cols = sample(range(self.n_features), tree_features) cols.sort() self.col_list[i, :] = cols data_temp = data[:, cols] n_nans = np.sum(np.isnan(data_temp), 1) data_temp = data_temp[n_nans == 0, :] classes_temp = classes[n_nans == 0] #bag = BaggingClassifier(n_estimators=1, max_features=tree_features) bag = RandomForestClassifier(n_estimators=1, max_features=tree_features) bag.fit(data_temp, classes_temp) self.bags.append(bag) print(np.shape(data_temp))
def reconstruct_coincidence(self, coincidence_events, station_numbers=None, offsets=None, initial=None): """Reconstruct a single coincidence :param coincidence_events: a coincidence list consisting of one or more (station_number, event) tuples. :param station_numbers: list of station numbers, to only use events from those stations. :param offsets: dictionary with detector offsets for each station. These detector offsets should be relative to one detector from a specific station. :param initial: dictionary with already fitted shower parameters. :return: list of theta, phi, and station numbers. """ if len(coincidence_events) < 1: return nan, nan, [] if offsets is None: offsets = {} if initial is None: initial = {} # Subtract base timestamp to prevent loss of precision ts0 = int(coincidence_events[0][1]['timestamp']) ets0 = ts0 * int(1e9) self.cluster.set_timestamp(ts0) t, x, y, z, nums = ([], [], [], [], []) offsets = self.get_station_offsets(coincidence_events, station_numbers, offsets, ts0) for station_number, event in coincidence_events: if station_numbers is not None: if station_number not in station_numbers: continue t_off = offsets.get(station_number, NO_OFFSET) station = self.cluster.get_station(station_number) t_detectors = relative_detector_arrival_times(event, ets0, offsets=t_off, station=station) for t_detector, detector in zip(t_detectors, station.detectors): if not isnan(t_detector): dx, dy, dz = detector.get_coordinates() t.append(t_detector) x.append(dx) y.append(dy) z.append(dz) if not all(isnan(t_detectors)): nums.append(station_number) if len(t) >= 3 and 'core_x' in initial and 'core_y' in initial: theta, phi = self.curved.reconstruct_common(t, x, y, z, initial) elif len(t) == 3: theta, phi = self.direct.reconstruct_common(t, x, y, z, initial) elif len(t) > 3: theta, phi = self.fit.reconstruct_common(t, x, y, z, initial) else: theta, phi = (nan, nan) return theta, phi, nums
def nanallclose(x, y, rtol=1.0e-5, atol=1.0e-8): """Numpy allclose function which allows NaN Input x, y: Either scalars or numpy arrays Output True or False Returns True if all non-nan elements pass. """ xn = numpy.isnan(x) yn = numpy.isnan(y) if numpy.any(xn != yn): # Presence of NaNs is not the same in x and y return False if numpy.all(xn): # Everything is NaN. # This will also take care of x and y being NaN scalars return True # Filter NaN's out if numpy.any(xn): x = x[-xn] y = y[-yn] # Compare non NaN's and return return numpy.allclose(x, y, rtol=rtol, atol=atol)
def __set_static_gaus_pmfs(self): if np.logical_not(self.off_buff.is_full()): print "The long term buffer is not yet full. This may give undesirable results" # median RSS of off-state buffer cal_med = self.off_buff.get_no_nan_median() if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0): sys.stderr.write('At least one link has a median of 127 or is nan\n\n') quit() if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0): sys.stderr.write('the long term buffer has a nan') quit() cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T # variance of RSS during calibration cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T # Compute the off_link emission probabilities for each link x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0 self.off_links = self.__normalize_pmf(x) # Compute the on_link emission probabilities for each link x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3 self.on_links = self.__normalize_pmf(x)
def _get_sum(self): """Compute sum of non NaN / Inf values in the array.""" try: return self._sum except AttributeError: self._sum = self.no_nan.sum() # The following 2 lines are needede as in Python 3.3 with NumPy # 1.7.1, numpy.ndarray and numpy.memmap aren't hashable. if type(self._sum) is numpy.memmap: self._sum = numpy.asarray(self._sum).item() if self.has_nan and self.no_nan.mask.all(): # In this case the sum is not properly computed by numpy. self._sum = 0 if numpy.isinf(self._sum) or numpy.isnan(self._sum): # NaN may happen when there are both -inf and +inf values. if self.has_nan: # Filter both NaN and Inf values. mask = self.no_nan.mask + numpy.isinf(self[1]) else: # Filter only Inf values. mask = numpy.isinf(self[1]) if mask.all(): self._sum = 0 else: self._sum = numpy.ma.masked_array(self[1], mask).sum() # At this point there should be no more NaN. assert not numpy.isnan(self._sum) return self._sum
def test_align(self): left = create_test_data() right = left.copy(deep=True) right['dim3'] = ('dim3', list('cdefghijkl')) right['var3'][:-2] = right['var3'][2:] right['var3'][-2:] = np.random.randn(*right['var3'][-2:].shape) intersection = list('cdefghij') union = list('abcdefghijkl') left2, right2 = align(left, right, join='inner') self.assertArrayEqual(left2['dim3'], intersection) self.assertDatasetIdentical(left2, right2) left2, right2 = align(left, right, join='outer') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertArrayEqual(left2['dim3'], union) self.assertDatasetIdentical(left2.labeled(dim3=intersection), right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(left2['var3'][-2:]).all()) self.assertTrue(np.isnan(right2['var3'][:2]).all()) left2, right2 = align(left, right, join='left') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertVariableEqual(left2['dim3'], left['dim3']) self.assertDatasetIdentical(left2.labeled(dim3=intersection), right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(right2['var3'][:2]).all()) left2, right2 = align(left, right, join='right') self.assertVariableEqual(left2['dim3'], right2['dim3']) self.assertVariableEqual(left2['dim3'], right['dim3']) self.assertDatasetIdentical(left2.labeled(dim3=intersection), right2.labeled(dim3=intersection)) self.assertTrue(np.isnan(left2['var3'][-2:]).all())
def analyze_symbols(symbols): number = 0 total_bull_correct = np.zeros(len(patterns)) total_bull_wrong = np.zeros(len(patterns)) total_bear_correct = np.zeros(len(patterns)) total_bear_wrong = np.zeros(len(patterns)) for symbol in symbols: print symbol bc, bw, bco, bwr = evaluate_pattern(symbol) if bc is None: continue for i in range(len(bc)): if not np.isnan(bc[i]): total_bull_correct[i] += bc[i] total_bull_wrong[i] += bw[i] if not np.isnan(bco[i]): total_bear_correct[i] += bco[i] total_bear_wrong[i] += bwr[i] number += 1 sum_bull = total_bull_correct + total_bull_wrong sum_bear = total_bear_correct + total_bear_wrong pgain = total_bull_correct*1.0/sum_bull plose = total_bear_correct*1.0/sum_bear keys = patterns for i in range(len(keys)): print keys[i], ": ", pgain[i], " ", sum_bull[i], " ", plose[i], " ", sum_bear[i]
def estimateBIsochrone(R,z,pot=None): """ NAME: estimateBIsochrone PURPOSE: Estimate a good value for the scale of the isochrone potential by matching the slope of the rotation curve INPUT: R,z = coordinates (if these are arrays, the median estimated delta is returned, i.e., if this is an orbit) pot= Potential instance or list thereof OUTPUT: b if 1 R,Z given bmin,bmedian,bmax if multiple R given HISTORY: 2013-09-12 - Written - Bovy (IAS) """ if pot is None: #pragma: no cover raise IOError("pot= needs to be set to a Potential instance or list thereof") if isinstance(R,nu.ndarray): bs= nu.array([estimateBIsochrone(R[ii],z[ii],pot=pot) for ii in range(len(R))]) return (nu.amin(bs[True-nu.isnan(bs)]), nu.median(bs[True-nu.isnan(bs)]), nu.amax(bs[True-nu.isnan(bs)])) else: r2= R**2.+z**2 r= math.sqrt(r2) dlvcdlr= dvcircdR(pot,r)/vcirc(pot,r)*r try: b= optimize.brentq(lambda x: dlvcdlr-(x/math.sqrt(r2+x**2.)-0.5*r2/(r2+x**2.)), 0.01,100.) except: #pragma: no cover b= nu.nan return b
def Column8(df,Nlen,Tlen): mA = np.zeros((Nlen*Tlen,Nlen*2+9),float) vb = np.zeros(Nlen*Tlen) i = 0 for firmid,firmgroup in df.groupby('Firmid'): if not firmgroup['Dprice'].isnull().values.any(): mA[i*Tlen:(i+1)*Tlen,i] = np.ones(Tlen) mA[i*Tlen:(i+1)*Tlen,i+Nlen] = firmgroup['Dmarket'].values mA[i*Tlen:(i+1)*Tlen,2*Nlen] = firmgroup['Event'].values eu = firmgroup['Conc'].values where_are_NaNs = np.isnan(eu) eu[where_are_NaNs] = 0 mis = firmgroup['Dumconc'].values where_are_NaNs = np.isnan(mis) mis[where_are_NaNs] = 0 mA[i*Tlen:(i+1)*Tlen,1+2*Nlen] = np.multiply(firmgroup['Do'].values,firmgroup['Event'].values) mA[i*Tlen:(i+1)*Tlen,2+2*Nlen] = np.multiply(firmgroup['Di'].values,firmgroup['Event'].values) mA[i*Tlen:(i+1)*Tlen,3+2*Nlen] = np.multiply(eu,firmgroup['Event'].values) mA[i*Tlen:(i+1)*Tlen,4+2*Nlen] = np.multiply(mis,firmgroup['Event'].values) mA[i*Tlen:(i+1)*Tlen,5+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Do'].values) mA[i*Tlen:(i+1)*Tlen,6+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Do'].values) mA[i*Tlen:(i+1)*Tlen,7+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Di'].values) mA[i*Tlen:(i+1)*Tlen,8+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Di'].values) vb[i*Tlen:(i+1)*Tlen] = [p2f(x) for x in firmgroup['Dprice'].values] i += 1 tmpp = inv(mA.T.dot(mA)).dot(mA.T) Xhat = tmpp.dot(vb) gamma = Xhat[-9:] print gamma return gamma
def calcForces_and_potentialE(F_x, F_y, old_or_new, x_positions, y_positions, V_atoms): """calculates x and y forces and potential energy per atom as summed over all contributions due to all neighbors, as functions of position and the parameters of the LJ potential""" for atom in xrange(Natoms): for i in xrange(Natoms): if i != atom: delx = x_positions[atom,old_or_new]-x_positions[i,old_or_new] dely = y_positions[atom,old_or_new]-y_positions[i,old_or_new] r_ij = np.sqrt( (x_positions[atom,old_or_new]-x_positions[i,old_or_new])**2\ + (y_positions[atom,old_or_new]-y_positions[i,old_or_new])**2 ) F_x[atom,old_or_new] = F_x[atom,old_or_new] - 24.0 *epsilon * sigma**6 \ * delx * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8 F_y[atom,old_or_new] = F_y[atom,old_or_new] - 24.0 *epsilon * sigma**6 * \ dely * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8 V_atoms[atom] = V_atoms[atom] + 4.0 * epsilon \ * ( (sigma/r_ij)**12-(sigma/r_ij)**6 ) if np.isnan(F_x[atom,old_or_new]) or np.isinf(F_x[atom,old_or_new]): F_x[atom,old_or_new]=0 if np.isnan(F_y[atom,old_or_new]) or np.isinf(F_y[atom,old_or_new]): F_y[atom,0]=0 if np.isnan(V_atoms[atom]) or np.isinf(V_atoms[atom]): V_atoms[atom]=0 return F_x, F_y, V_atoms
def get_depth_color(self, value): vmin = -0.02 vmax = 0.02 if value < vmin: value = vmin elif value > vmax: value = vmax dv = vmax - vmin r = g = b = 1 if value < (vmin + 0.25 * dv): r = 0 g = 4 * (value - vmin) / dv elif value < (vmin + 0.5 * dv): r = 0 b = 1 + 4 * (vmin + 0.25 * dv - value) / dv elif value < (vmin + 0.75 * dv): r = 4 * (value - vmin - 0.5 * dv) / dv b = 0 else: g = 1 + 4 * (vmin + 0.75 * dv - value) / dv b = 0 if np.isnan(r) or np.isnan(g) or np.isnan(b): r = b = g = 0 return (np.array([b, g, r]) * 255).astype(int)
def _evaluate_projection(self, x, y): """ kNNEvaluate - evaluate class separation in the given projection using a k-NN method Parameters ---------- x - variables to evaluate y - class Returns ------- scores """ if self.percent_data_used != 100: rand = np.random.choice(len(x), int(len(x) * self.percent_data_used / 100), replace=False) x = x[rand] y = y[rand] neigh = KNeighborsClassifier(n_neighbors=3) if self.attr_color.is_discrete else \ KNeighborsRegressor(n_neighbors=3) assert ~(np.isnan(x).any(axis=None) | np.isnan(x).any(axis=None)) neigh.fit(x, y) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) scores = cross_val_score(neigh, x, y, cv=3) return scores.mean()
def bootstrap(func, arglist, N, kwargs={}): '''Computes error via bootstrapping on an arbitrary function. The major restriction is that func is assumed to return a single, 1D, Numpy array. Bootstrap will also resample ALL of the elements of arglist. If you want to keep some inputs unchanged pass them as keywords. The func can have an arbitrary number of arguments and keyword arguments. If the output of func is a Ndarray of length N then bootstrap returns two arrays of length N. The first is the mean value over all bootstraps and the second is the stddev of the same. ''' if type(arglist) != list: arglist = [arglist] size = len(arglist[0]) resultarr = None for i in range(N): idx = np.random.randint(0,size,size) bootargs = [i[idx] for i in arglist] result = func(*bootargs,**kwargs) try: resultarr = np.vstack((resultarr,result)) except ValueError: resultarr = result print np.isnan(resultarr).sum() return bn.nanmean(resultarr,axis=0),bn.nanstd(resultarr,axis=0)
def moments(data, circle, rotate, vheight, estimator=median, **kwargs): """Returns (height, amplitude, x, y, width_x, width_y, rotation angle) the gaussian parameters of a 2D distribution by calculating its moments. Depending on the input parameters, will only output a subset of the above. """ total = np.abs(data).sum() Y, X = np.indices(data.shape) # python convention: reverse x,y np.indices y = np.argmax((X*np.abs(data)).sum(axis=1)/total) x = np.argmax((Y*np.abs(data)).sum(axis=0)/total) col = data[int(y), :] # FIRST moment, not second! width_x = np.sqrt(np.abs((np.arange(col.size)-y)*col).sum() / np.abs(col).sum()) row = data[:, int(x)] width_y = np.sqrt(np.abs((np.arange(row.size)-x)*row).sum() / np.abs(row).sum()) width = (width_x + width_y) / 2. height = estimator(data.ravel()) amplitude = data.max()-height mylist = [amplitude, x, y] if (np.isnan(width_y) or np.isnan(width_x) or np.isnan(height) or np.isnan(amplitude)): raise ValueError("something is nan") if vheight: mylist = [height] + mylist if not circle: mylist = mylist + [width_x, width_y] if rotate: mylist = mylist + [0.] # rotation "moment" is just zero... # also, circles don't rotate. else: mylist = mylist + [width] return mylist
def test_nan_inf(self): # Not-a-number q = u.Quantity('nan', unit='cm') assert np.isnan(q.value) q = u.Quantity('NaN', unit='cm') assert np.isnan(q.value) q = u.Quantity('-nan', unit='cm') # float() allows this assert np.isnan(q.value) q = u.Quantity('nan cm') assert np.isnan(q.value) assert q.unit == u.cm # Infinity q = u.Quantity('inf', unit='cm') assert np.isinf(q.value) q = u.Quantity('-inf', unit='cm') assert np.isinf(q.value) q = u.Quantity('inf cm') assert np.isinf(q.value) assert q.unit == u.cm q = u.Quantity('Infinity', unit='cm') # float() allows this assert np.isinf(q.value) # make sure these strings don't parse... with pytest.raises(TypeError): q = u.Quantity('', unit='cm') with pytest.raises(TypeError): q = u.Quantity('spam', unit='cm')
def reportPowerDeviationsDifference(self, book, sheetName, deviationsA, deviationsB, gradient): sh = book.add_sheet(sheetName, cell_overwrite_ok=True) for i in range(self.windSpeedBins.numberOfBins): sh.col(i + 1).width = 256 * 5 for j in range(self.turbulenceBins.numberOfBins): turbulence = self.turbulenceBins.binCenterByIndex(j) row = self.turbulenceBins.numberOfBins - j - 1 sh.write(row, 0, turbulence, self.percent_no_dp_style) for i in range(self.windSpeedBins.numberOfBins): windSpeed = self.windSpeedBins.binCenterByIndex(i) col = i + 1 if j == 0: sh.write(self.turbulenceBins.numberOfBins, col, windSpeed, self.one_dp_style) if windSpeed in deviationsA.matrix: if turbulence in deviationsA.matrix[windSpeed]: deviationA = deviationsA.matrix[windSpeed][turbulence] deviationB = deviationsB.matrix[windSpeed][turbulence] if not np.isnan(deviationA) and not np.isnan(deviationB): diff = abs(deviationA) - abs(deviationB) sh.write(row, col, diff, gradient.getStyle(diff))
def test_update_player(self): player_dict = io.create_player_dict({'jamesle01': ''}) player_dict['jamesle01']['gamelog_url_list'] = ['http://www.basketball-reference.com/players/j/jamesle01/gamelog/2013/', 'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2015/', 'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2014/'] loaded_dict = scraper.load_player(player_dict, 'jamesle01') assert loaded_dict['jamesle01']['gamelog_data'] is not None gd = loaded_dict['jamesle01']['gamelog_data'] assert len(gd) == 285 # Okay now pretend this URL was there all along as well player_dict['jamesle01']['gamelog_url_list'].append('http://www.basketball-reference.com/players/j/jamesle01/gamelog/2016/') scraper.update_player(player_dict, 'jamesle01', 2016) gd = loaded_dict['jamesle01']['gamelog_data'] assert len(gd) > 285 # but I mean, I don't know exactly what it'll be since more games are still being played this year import datetime # so explicitly make sure this test is updated for the 2016-17 season assert datetime.datetime.today() <= datetime.datetime(year=2016, month=7, day=1) # Spot check a game to make sure the stats are what we expect test_game_dict = dict(gd.loc['2015-10-30']) reference_dict = {u'+/-': 7.0, u'3P': 0.0, u'3P%': 0.0, u'3PA': 2.0, u'AST': 4.0, u'Age': u'30-304', u'BLK': 0.0, u'DFS': 41.3, u'DRB': 3.0, u'Date': nan, u'FG': 13.0, u'FG%': 0.684, u'FGA': 19.0, u'FT%': 0.6, u'FT': 3.0, u'FTA': 5.0, u'G': 3.0, u'GS': 1.0, u'GmSc': 21.0, u'HomeAway': nan, u'MP': u'33:56', u'ORB': 2.0, u'Opp': u'MIA', u'PF': 3.0, u'PTS': 29.0, u'Rk': 3.0, u'STL': 1.0, u'TOV': 4.0, u'TRB': 5.0, u'Tm': u'CLE', u'WinLoss': u'W (+10)'} self.assertItemsEqual(reference_dict.keys(), test_game_dict.keys()) for k in reference_dict: # fortunately almost equal works fine if the items == each other so we can just pass in strings w/o worrying # unfortunately nan doesn't match :( :( so we might as well case it out anyway; nevermind if isinstance(reference_dict[k], float): if isnan(reference_dict[k]): assert isnan(test_game_dict[k]) else: self.assertAlmostEqual(reference_dict[k], test_game_dict[k], places=3) else: self.assertEqual(reference_dict[k], test_game_dict[k])
def test_float_modulus_corner_cases(self): # Check remainder magnitude. for dt in np.typecodes['Float']: b = np.array(1.0, dtype=dt) a = np.nextafter(np.array(0.0, dtype=dt), -b) rem = self.mod(a, b) assert_(rem <= b, 'dt: %s' % dt) rem = self.mod(-a, -b) assert_(rem >= -b, 'dt: %s' % dt) # Check nans, inf with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in remainder") for dt in np.typecodes['Float']: fone = np.array(1.0, dtype=dt) fzer = np.array(0.0, dtype=dt) finf = np.array(np.inf, dtype=dt) fnan = np.array(np.nan, dtype=dt) rem = self.mod(fone, fzer) assert_(np.isnan(rem), 'dt: %s' % dt) # MSVC 2008 returns NaN here, so disable the check. #rem = self.mod(fone, finf) #assert_(rem == fone, 'dt: %s' % dt) rem = self.mod(fone, fnan) assert_(np.isnan(rem), 'dt: %s' % dt) rem = self.mod(finf, fone) assert_(np.isnan(rem), 'dt: %s' % dt)
def exact_roc(actuals, controls): """ computes the area under the roc curve for separating to sets. Uses all possibl thresholds and trapezoidal interpolation. Also returns arrays of the true positive rate and the false positive rate. """ actuals = np.ravel(actuals) controls = np.ravel(controls) if np.isnan(actuals).any(): raise RuntimeError('NaN found in actuals') if np.isnan(controls).any(): raise RuntimeError('NaN found in controls') thresholds = np.hstack([-np.inf, np.unique(np.concatenate((actuals,controls))), np.inf])[::-1] true_pos_rate = np.empty(thresholds.size) false_pos_rate = np.empty(thresholds.size) num_act = float(len(actuals)) num_ctr = float(len(controls)) for i, value in enumerate(thresholds): true_pos_rate[i] = (actuals >= value).sum() / num_act false_pos_rate[i] = (controls >= value).sum() / num_ctr auc = np.dot(np.diff(false_pos_rate), (true_pos_rate[0:-1]+true_pos_rate[1:])/2) return(auc, true_pos_rate, false_pos_rate)
def __init__(self, x, y): assert np.ndim(x)==2 and np.ndim(y)==2 and np.shape(x)==np.shape(y), \ 'x and y must be 2D arrays of the same size.' if np.any(np.isnan(x)) or np.any(np.isnan(y)): x = np.ma.masked_where( (isnan(x)) | (isnan(y)) , x) y = np.ma.masked_where( (isnan(x)) | (isnan(y)) , y) self.x_vert = x self.y_vert = y mask_shape = tuple([n-1 for n in self.x_vert.shape]) self.mask_rho = np.ones(mask_shape, dtype='d') # If maskedarray is given for verticies, modify the mask such that # non-existant grid points are masked. A cell requires all four # verticies to be defined as a water point. if isinstance(self.x_vert, np.ma.MaskedArray): mask = (self.x_vert.mask[:-1,:-1] | self.x_vert.mask[1:,:-1] | \ self.x_vert.mask[:-1,1:] | self.x_vert.mask[1:,1:]) self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d') if isinstance(self.y_vert, np.ma.MaskedArray): mask = (self.y_vert.mask[:-1,:-1] | self.y_vert.mask[1:,:-1] | \ self.y_vert.mask[:-1,1:] | self.y_vert.mask[1:,1:]) self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d') self._calculate_subgrids() self._calculate_metrics()
def update(self, tick): security = tick['security'] quote_time = datetime.datetime.fromtimestamp(int(tick['data']['timestamp'])) last_price = tick['data']['last'] log.debug("tick update security %s with tick %s, price %s" % (security.symbol, quote_time, last_price)) # update sma # appending new row to df is not efficient data = tick['data'] row = [quote_time, float(data['volume']), float(data['bid']), float(data['ask']), float(data['last']), float(data['high']), float(data['low'])] new_serie = pd.Series(row, index=['datetime', 'volume', 'bid', 'ask', 'last', 'high', 'low']) self.quotes = self.quotes.append(new_serie, ignore_index=True) self.sma_short = SMA(self.quotes, timeperiod=10, key='last') self.sma_mid = SMA(self.quotes, timeperiod=60, key='last') self.sma_long = SMA(self.quotes, timeperiod=200, key='last') if np.isnan(self.sma_long.iloc[-1]) or np.isnan(self.sma_mid.iloc[-1]) or np.isnan(self.sma_short.iloc[-1]): log.info('not enough data, skip to reduce risk') return None action = None if security.symbol not in self.account.holdings: action = self.check_buy(security) # already have some holdings else: action = self.check_sell(security) log.info('strategy action {0}'.format(action)) return action
def test_nan_arithmetic(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) def make_nan_contaminated_vector(size): shape = (size,) a = numpy.random.randn(*shape).astype(numpy.float32) #for i in range(0, shape[0], 3): #a[i] = float('nan') from random import randrange for i in range(size//10): a[randrange(0, size)] = float('nan') return a size = 1 << 20 a = make_nan_contaminated_vector(size) a_gpu = cl_array.to_device(context, queue, a) b = make_nan_contaminated_vector(size) b_gpu = cl_array.to_device(context, queue, b) ab = a*b ab_gpu = (a_gpu*b_gpu).get() for i in range(size): assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() # Now run it with the lag parameter corr2 = self.ts.autocorr(lag=1) # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: self.assertTrue(np.isnan(corr1)) self.assertTrue(np.isnan(corr2)) else: self.assertEqual(corr1, corr2) # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function n = 1 + np.random.randint(max(1, len(self.ts) - 2)) corr1 = self.ts.corr(self.ts.shift(n)) corr2 = self.ts.autocorr(lag=n) # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: self.assertTrue(np.isnan(corr1)) self.assertTrue(np.isnan(corr2)) else: self.assertEqual(corr1, corr2)
def norm_range(data, mins, maxs, lowbound, highbound): """ Normalizing the data with range normalization between lowbound and highbound Keyword parameters: data the data to be normalized, numpy.ndarray, each row is a sample mins, maxs arrays of minimum and maximum values that each feature can take lowbound, highbound the bounds of the normalization """ denom = maxs - mins diff = highbound - lowbound addit = numpy.ndarray([data.shape[0],1]) addit.fill(lowbound) for i in range(data.shape[0]): # for each feature vector data[i] = diff * (data[i] - mins) / denom + lowbound nanCounter = numpy.isnan(data[i]) #If all data was nan, maitain nan, if(sum(nanCounter)!=data.shape[1]): data[i][numpy.isnan(data[i])] = (lowbound + highbound) / 2 return data
def get_coded_data(cases_df, case_ids, coded_feature_names): """ Retrieves the valences corresponding to case_ids, along with coded features, if any Recode unknown valences to neutral. args: cases_df: A dataframe containing the case variables. case_ids: list of sorted case_ids coded_feature_names: list of column names to pull from cases_df (ie 'geniss' or ['geniss','casetyp1']) returns: valences: np array of valences coded_feature_array: np array of coded features filtered_cases_df: Dataframe containing the sorted, filtered case variables """ UNKNOWN_VALENCE = 0 NEUTRAL_VALENCE = 2 if isinstance(coded_feature_names, str): coded_feature_names = [coded_feature_names] print "coded_feature_names: ",coded_feature_names valences = [] coded_feature_list = [] for case_id in case_ids: valence = cases_df[cases_df['caseid'] == case_id]['direct1'].values[0] if np.isnan(valence)==False: valence = int(valence) else: valence = 2 if coded_feature_names is not None: coded_feature_row = cases_df[cases_df['caseid'] == case_id][coded_feature_names].values[0] clean_row = [] #clean row for val in coded_feature_row: if val and np.isnan(val) == False: clean_row.append(int(val)) else: clean_row.append(0) assert clean_row[0]>=0, "" coded_feature_list.append(clean_row) # Replacing unknown valence variables with netural scores. if valence == UNKNOWN_VALENCE: valence = NEUTRAL_VALENCE valences.append(valence) #one-hot encoding if coded_feature_names is not None: enc = OneHotEncoder() coded_feature_array = enc.fit_transform(np.array(coded_feature_list)) print "Coded Feature Array shape: ", coded_feature_array.shape else: coded_feature_array = np.array([]) #Filter case df filtered_case_df = filter_cases_df(cases_df,case_ids) return np.array(valences),coded_feature_array,filtered_case_df
def plotting(self, filename, data, xstart, logx=False, yfull=True, rainbow=None, markers=None, pngdpi=300): """ data is expected to be a dictionary of n profiles, each profile is again a dictionary {'x': [], 'y': []}. Each profile will be plotted in a curve. """ if not self.legends: for i in xrange(len(data)): self.legends.append("legends[%d]" % (i)) if rainbow is None: rainbow = ['b','g','r','c','m','orange','y','k','silver','coral','lime','brown','violet','navy','greenyellow'] # rainbow = brewer2mpl.get_map('Set3', 'qualitative', 12).mpl_colors # see: Documentation @ https://github.com/jiffyclub/brewer2mpl/wiki if markers is None: markers = ['o','s','*','v','p','D','^','+','<','>','d','H','x'] zorder0 = 5 # the lowest zorder for major curves # Sort the keys list as dictionary is non-ordered # and taking care of the ordering in legends as well data_keys = data.keys() data_keys.sort() order_legends = [] for key in data_keys: idx = data.keys().index(key) order_legends.append(self.legends[idx]) #====================================# # Plotting commands start here # #====================================# #---------------------------------------------- import matplotlib # Use a non-interactive backend such as Agg (for PNGs), PDF, SVG, or PS. matplotlib.use('Agg') # make sure to call this before pyplot #---------------------------------------------- import matplotlib.pyplot as plt # plt.rc('text', usetex = True) fig = plt.figure(1, figsize=self.figsize) # ax = fig.add_subplot(111) ax = fig.add_axes(self.position) # self.position = [left, bottom, width, height] # Plot the main data: #===================== for ialg, alg in enumerate(data_keys): if not logx: ax.plot(data[alg]['x'], data[alg]['y'], drawstyle='steps-post', clip_on=False, color=rainbow[ialg % len(rainbow)], lw=self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) else: ax.semilogx(data[alg]['x'], data[alg]['y'], drawstyle='steps-post', clip_on=False, color=rainbow[ialg % len(rainbow)], lw=self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) xLim = [xstart, max([max(data[alg]['x']) for alg in data_keys])] if logx and xLim[0]==0: xLim[0] = 1 # Plot the horizontal 'extended' line from the end of each curve to xLim[1] # and also put a cross to mark the end of the curve #========================================================================== for ialg, alg in enumerate(data_keys): if data[alg]['x'][-1] < xLim[1]: if not logx: # Extended horizontal line ax.plot([data[alg]['x'][-1], xLim[1]], [data[alg]['y'][-2], data[alg]['y'][-2]], clip_on=False, color=rainbow[ialg % len(rainbow)], lw=0.9*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) # Add a cross to mark the end of the curve ax.plot([data[alg]['x'][-1]], [data[alg]['y'][-2]], marker='x', markeredgecolor=rainbow[ialg % len(rainbow)], markersize=2.7*self.markerSize, markeredgewidth=0.8*self.lineWidth, clip_on=False, color=rainbow[ialg % len(rainbow)], lw=self.lineWidth, alpha=1, zorder=zorder0+ialg) else: # Extended horizontal line ax.semilogx([data[alg]['x'][-1], xLim[1]], [data[alg]['y'][-2], data[alg]['y'][-2]], clip_on=False, color=rainbow[ialg % len(rainbow)], lw=0.9*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) # Add a cross to mark the end of the curve ax.semilogx([data[alg]['x'][-1]], [data[alg]['y'][-2]], marker='x', markeredgecolor=rainbow[ialg % len(rainbow)], markersize=2.7*self.markerSize, markeredgewidth=0.8*self.lineWidth, clip_on=False, color=rainbow[ialg % len(rainbow)], lw=self.lineWidth, alpha=1, zorder=zorder0+ialg) # Plot markers for the curves: #============================== #xarr = np.arange(xstart, data[data_keys[0]]['x'][-1] + 1) # generate data for the x-axis #print xarr[-1] #print data[data_keys[0]]['x'][-1] # TODO: debug ecdf.py: why 40000.2 ??? nMarkers = self.nMarkers # 5 markers on each line for ialg, alg in enumerate(data_keys): if not logx: lenx = xLim[1] - xLim[0] alt = int(lenx / (nMarkers*len(data))) # alternate among the first markers over lines (in idx unit) offset = int(lenx / nMarkers) # offset between 2 consecutive markers of a line (in idx unit) # Generate estimated x's for markers estxMarkers = int(alt/2) + np.arange(start=ialg*alt, stop=lenx-0.5*offset, step=offset, dtype=int) else: lenx = np.log10(xLim[1]) - np.log10(xLim[0]) alt = lenx / (nMarkers*len(data)) # alternate among the first markers over lines (in idx unit) offset = lenx / nMarkers # offset between 2 consecutive markers of a line (in idx unit) # Generate estimated x's for markers estxMarkers = alt/2 + np.log10(xLim[0]) + np.arange(start=ialg*alt, stop=lenx-0.5*offset, step=offset) estxMarkers = 10 ** estxMarkers if ialg==0: estxMarkers = np.delete(estxMarkers, 0) # skip the very first (not so visible) marker on the semilogx scale # Sample the real x and y of the markers from the curve markerCoord = {'x': [], 'y': []} icur = 0 for estx in estxMarkers: for idx, x in enumerate(data[alg]['x']): if idx < icur: continue # TODO: this is added to avoid a list index error with DIFF maxfevals try: data[alg]['x'][idx+1] except: continue if estx == x or (estx > x and estx < data[alg]['x'][idx+1]): markerCoord['x'].append(x) markerCoord['y'].append(data[alg]['y'][idx]) icur = idx + 1 break # Plot the sampled markers. TODO: alpha doesn't work for marker! for x, y in zip(markerCoord['x'], markerCoord['y']): if not logx: ax.plot(x, y, ls='', clip_on=False, markerfacecolor='none', marker=markers[ialg % len(markers)], markeredgecolor=rainbow[ialg % len(rainbow)], markersize=self.markerSize*(1 if markers[ialg % len(markers)]!='*' else 1.45), markeredgewidth=0.7*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) else: ax.semilogx(x, y, ls='', clip_on=False, markerfacecolor='none', marker=markers[ialg % len(markers)], markeredgecolor=rainbow[ialg % len(rainbow)], markersize=self.markerSize*(1 if markers[ialg % len(markers)]!='*' else 1.45), markeredgewidth=0.7*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg) ax.set_xlim(xLim) if yfull: ax.set_ylim([0, 1]) ax.grid(True, which='both', color="gray", alpha=0.6, ls=self.gridLineStyle, lw=self.gridLineWeight) # print visibility? #ax.grid(True, which='both', color="gray", alpha=0.1, ls='-', lw=0.3) #ax.grid(True, which='both', axis='x', color="gray", alpha=0.15, ls='-', lw=0.2) for axis in ['top','bottom','left','right']: ax.spines[axis].set_linewidth(self.axesLineWidth) #=============================# # Make legends on the right # #=============================# if self.rightLegend: xExtRatio = self.xExtRatio xSegLenRatio = self.xSegLenRatio yShrinkRatio = self.yShrinkRatio labelBottomTop = self.labelBottomTop yLim = ax.get_ylim() xTicks = ax.get_xticks() # backup xticks for use after making legends on the right #if xTicks[0] < xLim[0]: # the 1st element usually doesn't appear on the axis # xTicks = np.delete(xTicks, 0) # thus remove it of the backed up xTicks while xTicks[-1] > xLim[1]: xTicks = np.delete(xTicks, -1) if logx: while xTicks[0] < xLim[0]: xTicks = np.delete(xTicks, 0) endData = [data[key]['y'][-1] for key in data_keys] for idx, key in enumerate(data_keys): j = -1 while np.isnan(endData[idx]): endData[idx] = data[key]['y'][j] j = j - 1 idx = np.argsort(endData) xLength = xLim[1] - xLim[0] yLength = yLim[1] - yLim[0] if not logx: normFactor = (xLim[1] - xLim[0]) / xLim[1] xExt = xLim[1] + xExtRatio*xSegLenRatio * normFactor*xLength else: normFactor = (np.log10(xLim[1]) - np.log10(xLim[0])) / np.log10(xLim[1]) # helps adjust the extension parts when xstart is large xExt = 10 ** (np.log10(xLim[1]) + xExtRatio*xSegLenRatio * normFactor*np.log10(xLength)) yExt = np.linspace(yLim[0] + labelBottomTop[0]*yShrinkRatio*yLength, yLim[1] - labelBottomTop[1]*yShrinkRatio*yLength, num=len(data), endpoint=True) # Plot all extension segments: #============================== for k, alg in enumerate(data_keys): if not logx: ax.plot(np.array([xLim[1], xExt]), np.array([endData[idx[k]], yExt[k]]), clip_on=False, ls='-', lw=self.lineWidth, solid_capstyle="round", color=rainbow[idx[k] % len(rainbow)], alpha=self.alpha, marker=markers[idx[k] % len(markers)], markeredgecolor=rainbow[idx[k] % len(rainbow)], markersize=self.markerSize*(1 if markers[idx[k] % len(markers)]!='*' else 1.45), markerfacecolor='none', markeredgewidth=0.7*self.lineWidth, zorder=zorder0+idx[k]) ax.text(xExt*1.015*((self.markerSize/7.5)**1)*normFactor, yExt[k], r'%s' % (order_legends[idx[k]]), verticalalignment='bottom', fontsize=self.rightFontSize) # verticalalignment='center' else: ax.semilogx(np.array([xLim[1], xExt]), np.array([endData[idx[k]], yExt[k]]), clip_on=False, ls='-', lw=self.lineWidth, solid_capstyle="round", color=rainbow[idx[k] % len(rainbow)], alpha=self.alpha, marker=markers[idx[k] % len(markers)], markeredgecolor=rainbow[idx[k] % len(rainbow)], markersize=self.markerSize*(1 if markers[idx[k] % len(markers)]!='*' else 1.45), markerfacecolor='none', markeredgewidth=0.7*self.lineWidth, zorder=zorder0+idx[k]) ax.text(xExt * 10**(0.015*((self.markerSize/7.5)**1)*normFactor*np.log10(xExt)), yExt[k], r'%s' % (order_legends[idx[k]]), verticalalignment='bottom', fontsize=self.rightFontSize) # verticalalignment='center' # Plot the vertical separation line: #=================================== if self.keepBox: #if not logx: # ax.plot(np.array([xLim[1], xLim[1]]), np.array([yLim[0],yLim[1]]), 'k-', lw=0.7, clip_on=False, zorder=1) #else: # ax.semilogx(np.array([xLim[1], xLim[1]]), np.array([yLim[0],yLim[1]]), 'k-', lw=0.7, clip_on=False, zorder=1) #ax.spines['right'].set_visible(False) ax.spines['right'].set_linewidth(0.7) # Plot extension part of bottom and top bars: if not logx: ax.plot(np.array([xLim[1], xLim[1] + xExtRatio * normFactor*xLength]), np.array([yLim[0], yLim[0]]), 'k-', lw=self.axesLineWidth, clip_on=False) ax.plot(np.array([xLim[1], xLim[1] + xExtRatio * normFactor*xLength]), np.array([yLim[1], yLim[1]]), 'k-', lw=self.axesLineWidth, clip_on=False) else: ax.semilogx(np.array([xLim[1], 10 ** (np.log10(xLim[1]) + xExtRatio * normFactor*np.log10(xLength))]), np.array([yLim[0], yLim[0]]), 'k-', lw=self.axesLineWidth, clip_on=False) ax.semilogx(np.array([xLim[1], 10 ** (np.log10(xLim[1]) + xExtRatio * normFactor*np.log10(xLength))]), np.array([yLim[1], yLim[1]]), 'k-', lw=self.axesLineWidth, clip_on=False) else: if not logx: ax.plot(np.array([xLim[1], xLim[1]]), np.array([yLim[0], yExt[-1]+(yExt[0]-yLim[0])]), clip_on=False, ls='-', c='k', lw=0.6) else: ax.semilogx(np.array([xLim[1], xLim[1]]), np.array([yLim[0], yExt[-1]+(yExt[0]-yLim[0])]), clip_on=False, ls='-', c='k', lw=0.6) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') # turn of tick on right side # The following is not necessary anymore, thanks to: clip_on=False, self.position, and extension of bottom and top bars # ax.set_xlim(xLim[0], xLim[1] + xLength*xExtRatio) ax.set_xticks(xTicks) ax.set_ylim(yLim) else: print "TODO: Make traditional legends" pass # Write text annotations: try: if type(self.note) != type(list()): annotations = [self.note] else: annotations = self.note yrange = yLim[1] - yLim[0] for i, string in enumerate(annotations): if not logx: ax.text(xLim[0] + 0.025*normFactor*xExt, yLim[1] - 0.035*yrange - i*(0.06*((self.annotationFontSize/15)**1)*yrange), r"%s" % string, verticalalignment='top', fontsize=self.annotationFontSize, bbox=dict(facecolor='white', edgecolor='none')) # verticalalignment='bottom' else: ax.text(xLim[0] * 10**(0.025*normFactor*np.log10(xExt)), yLim[1] - 0.035*yrange - i*(0.06*((self.annotationFontSize/15)**1)*yrange), r"%s" % string, verticalalignment='top', fontsize=self.annotationFontSize, bbox=dict(facecolor='white', edgecolor='none')) # verticalalignment='bottom' except: pass #ax.xaxis.tick_bottom() #ax.yaxis.tick_left() #ax.tick_params(direction='inout') # for both axes # ax.xaxis.set_tick_params(direction='inout') #[line.set_zorder(3) for line in ax.lines] import matplotlib as mpl mpl.rcParams['axes.unicode_minus'] = False #from matplotlib.ticker import ScalarFormatter #majorFormatter = ScalarFormatter(useMathText=True, useOffset=False) #majorFormatter.set_scientific(True) #majorFormatter.set_powerlimits((-4,4)) #ax.xaxis.set_major_formatter(majorFormatter) #ax.yaxis.set_major_formatter(majorFormatter) # Change fontsize for x and y ticks ax.tick_params(axis='x', labelsize=self.xTickLabelSize) ax.tick_params(axis='y', labelsize=self.yTickLabelSize) ax.set_xlabel(self.xlabel, fontsize=self.bottomFontSize) ax.set_ylabel(self.ylabel, fontsize=self.leftFontSize) ax.set_title(self.title, fontsize=self.topFontSize) # Save the plot to file: ext = filename[-4:] if not ext in ['.pdf', '.eps', '.png']: filename = filename + '.pdf' fig.savefig(filename) #fig.savefig(filename[:-4] + '.png', dpi=(pngdpi)) # plt.show() plt.close(fig)
def convertNansToZeros(ma): nan_elements = np.flatnonzero(np.isnan(ma.data)) if len(nan_elements) > 0: ma.data[nan_elements] = 0.0 return ma
def nan_fill(a): a = a.copy() nan_idx = np.where(np.isnan(a))[0] a[nan_idx] = a[nan_idx - 1] return a
def check_null_(self, X): nans = np.isnan(X) infs = np.isinf(X) nan_summary = np.sum(np.logical_or(nans, infs)) if nan_summary > 0: raise ValueError("nans/inf in frame = %s" % (nan_summary))
def convertNansToOnes(pArray): nan_elements = np.flatnonzero(np.isnan(pArray)) if len(nan_elements) > 0: pArray[nan_elements] = 1.0 return pArray
# init if iteration == 0: print('initializing the model...') sess.run(initializer) init_loss = sess.run(init_pass, {x_init: x_batch, y_init: y_batch}) sess.graph.finalize() else: xfs = np.split(x_batch, args.nr_gpu) yfs = np.split(y_batch, args.nr_gpu) feed_dict = {tf_lr: lr, tf_student_grad_scale: student_grad_scale} feed_dict.update({xs[i]: xfs[i] for i in range(args.nr_gpu)}) feed_dict.update({ys[i]: yfs[i] for i in range(args.nr_gpu)}) l, _ = sess.run([train_loss, train_step], feed_dict) train_iter_losses.append(l) if np.isnan(l): print('Loss is NaN') sys.exit(0) if (iteration + 1) % print_every == 0: avg_train_loss = np.mean(train_iter_losses) losses_avg_train.append(avg_train_loss) train_iter_losses = [] print('%d/%d train_loss=%6.8f bits/value=%.3f' % (iteration + 1, config.max_iter, avg_train_loss, avg_train_loss / config.ndim / np.log(2.))) corr = config.student_layer.corr.eval().flatten() if (iteration + 1) % config.save_every == 0: current_time = time.time() eta_time = (config.max_iter - iteration
def check_invalid_values(x): return np.isnan(x).sum() + np.isinf(x).sum()
def run(self, counts, background=None, exposure=None): """ Run image smoothing. Parameters ---------- counts : `~gammapy.maps.WcsNDMap` Counts map background : `~gammapy.maps.WcsNDMap` Background map exposure : `~gammapy.maps.WcsNDMap` Exposure map Returns ------- images : dict of `~gammapy.maps.WcsNDMap` Smoothed images; keys are: * 'counts' * 'background' * 'flux' (optional) * 'scales' * 'significance'. """ pixel_scale = counts.geom.pixel_scales.mean() kernels = self.kernels(pixel_scale) cubes = {} cubes["counts"] = scale_cube(counts.data, kernels) if background is not None: cubes["background"] = scale_cube(background.data, kernels) else: # TODO: Estimate background with asmooth method raise ValueError("Background estimation required.") if exposure is not None: flux = (counts.data - background.data) / exposure.data cubes["flux"] = scale_cube(flux, kernels) cubes["significance"] = self._significance_cube( cubes, method=self.parameters["method"] ) smoothed = self._reduce_cubes(cubes, kernels) result = {} for key in ["counts", "background", "scale", "significance"]: data = smoothed[key] # set remaining pixels with significance < threshold to mean value if key in ["counts", "background"]: mask = np.isnan(data) data[mask] = np.mean(locals()[key].data[mask]) result[key] = WcsNDMap(counts.geom, data) if exposure is not None: data = smoothed["flux"] mask = np.isnan(data) data[mask] = np.mean(flux[mask]) result["flux"] = WcsNDMap(counts.geom, data) return result
####################################################################################################### # submit without pseudo input_fn = 'ann_3tta_th4_test.csv' ####################################################################################################### input_df = pd.read_csv(sub_dir + input_fn) propagation_step = 100 test_ids = input_df.RunID source_ids = input_df.SourceID coarse_time = input_df.SourceTime #======================================================================================================================= x_trn = df_train.iloc[:, 1:100] # scale train X = x_trn.values where_are_NaNs = np.isnan(X) where_are_infs = np.isinf(X) X[where_are_NaNs] = 0 X[where_are_infs] = 0 scaler = RobustScaler() scaler.fit(X) scaled_train_X = scaler.transform(X) X = scaled_train_X #scaler = joblib.load("scaler.save") # bins for test segment bins = np.arange(0, 3000, 30) #======================================================================================================================= # Parallel code
def assertEqualWithNan(self, actual, expected): """Like assertEqual, but NaN==NaN.""" self.assertTrue(((actual == expected) | (np.isnan(actual) & np.isnan(expected))).all())
def calc_slope_vars(rn_sect, gain_sect, gdq_sect, group_time, max_seg): """ Calculate the segment-specific variance arrays for the given integration. Parameters ---------- rn_sect : ndarray read noise values for all pixels in data section, 2-D float gain_sect : ndarray gain values for all pixels in data section, 2-D float gdq_sect : ndarray data quality flags for pixels in section, 3-D int group_time : float Time increment between groups, in seconds. max_seg : int maximum number of segments fit Returns ------- den_r3 : ndarray for a given integration, the reciprocal of the denominator of the segment-specific variance of the segment's slope due to read noise, 3-D float den_p3 : ndarray for a given integration, the reciprocal of the denominator of the segment-specific variance of the segment's slope due to Poisson noise, 3-D float num_r3 : ndarray numerator of the segment-specific variance of the segment's slope due to read noise, 3-D float segs_beg_3 : ndarray lengths of segments for all pixels in the given data section and integration, 3-D int """ (nreads, asize2, asize1) = gdq_sect.shape npix = asize1 * asize2 imshape = (asize2, asize1) # Create integration-specific sections of input arrays for determination # of the variances. gdq_2d = gdq_sect[:, :, :].reshape((nreads, npix)) gain_1d = gain_sect.reshape(npix) gdq_2d_nan = gdq_2d.copy() # group dq with SATS will be replaced by nans gdq_2d_nan = gdq_2d_nan.astype(np.float32) wh_sat = np.where(np.bitwise_and(gdq_2d, constants.dqflags["SATURATED"])) if len(wh_sat[0]) > 0: gdq_2d_nan[wh_sat] = np.nan # set all SAT groups to nan del wh_sat # Get lengths of semiramps for all pix [number_of_semiramps, number_of_pix] segs = np.zeros_like(gdq_2d) # Counter of semiramp for each pixel sr_index = np.zeros(npix, dtype=np.uint8) pix_not_done = np.ones(npix, dtype=bool) # initialize to True i_read = 0 # Loop over reads for all pixels to get segments (segments per pixel) while (i_read < nreads and np.any(pix_not_done)): gdq_1d = gdq_2d_nan[i_read, :] wh_good = np.where(gdq_1d == 0) # good groups # if this group is good, increment those pixels' segments' lengths if len(wh_good[0]) > 0: segs[sr_index[wh_good], wh_good] += 1 del wh_good # Locate any CRs that appear before the first SAT group... wh_cr = np.where(gdq_2d_nan[i_read, :].astype(np.int32) & constants.dqflags["JUMP_DET"] > 0) # ... but not on final read: if (len(wh_cr[0]) > 0 and (i_read < nreads - 1)): sr_index[wh_cr[0]] += 1 segs[sr_index[wh_cr], wh_cr] += 1 del wh_cr # If current group is a NaN, this pixel is done (pix_not_done is False) wh_nan = np.where(np.isnan(gdq_2d_nan[i_read, :])) if len(wh_nan[0]) > 0: pix_not_done[wh_nan[0]] = False del wh_nan i_read += 1 segs = segs.astype(np.uint8) segs_beg = segs[:max_seg, :] # the leading nonzero lengths # Create reshaped version [ segs, y, x ] to simplify computation segs_beg_3 = segs_beg.reshape(max_seg, imshape[0], imshape[1]) segs_beg_3 = remove_bad_singles(segs_beg_3) # Create a version 1 less for later calculations for the variance due to # Poisson, with a floor=1 to handle single-group segments wh_pos_3 = np.where(segs_beg_3 > 1) segs_beg_3_m1 = segs_beg_3.copy() segs_beg_3_m1[wh_pos_3] -= 1 segs_beg_3_m1[segs_beg_3_m1 < 1] = 1 # For a segment, the variance due to Poisson noise # = slope/(tgroup * gain * (ngroups-1)), # where slope is the estimated median slope, tgroup is the group time, # and ngroups is the number of groups in the segment. # Here the denominator of this quantity will be computed, which will be # later multiplied by the estimated median slope. # Suppress, then re-enable, harmless arithmetic warnings, as NaN will be # checked for and handled later warnings.filterwarnings("ignore", ".*invalid value.*", RuntimeWarning) warnings.filterwarnings("ignore", ".*divide by zero.*", RuntimeWarning) den_p3 = 1. / (group_time * gain_1d.reshape(imshape) * segs_beg_3_m1) warnings.resetwarnings() # For a segment, the variance due to readnoise noise # = 12 * readnoise**2 /(ngroups_seg**3. - ngroups_seg)/( tgroup **2.) num_r3 = 12. * (rn_sect / group_time)**2. # always >0 # Reshape for every group, every pixel in section num_r3 = np.dstack([num_r3] * max_seg) num_r3 = np.transpose(num_r3, (2, 0, 1)) # Denominator den_r3 = 1./(segs_beg_3 **3.-segs_beg_3). The minimum number # of allowed groups is 2, which will apply if there is actually only 1 # group; in this case den_r3 = 1/6. This covers the case in which there is # only one good group at the beginning of the integration, so it will be # be compared to the plane of (near) zeros resulting from the reset. For # longer segments, this value is overwritten below. den_r3 = num_r3.copy() * 0. + 1. / 6 wh_seg_pos = np.where(segs_beg_3 > 1) # Suppress, then, re-enable harmless arithmetic warnings, as NaN will be # checked for and handled later warnings.filterwarnings("ignore", ".*invalid value.*", RuntimeWarning) warnings.filterwarnings("ignore", ".*divide by zero.*", RuntimeWarning) den_r3[wh_seg_pos] = 1. / ( segs_beg_3[wh_seg_pos]**3. - segs_beg_3[wh_seg_pos] ) # overwrite where segs>1 warnings.resetwarnings() return (den_r3, den_p3, num_r3, segs_beg_3)
def multichannel_correction_Dam(self): from scipy import optimize import ctypes # An included library with Python install. ctypes.windll.user32.MessageBoxW( 0, "Select area for multichannel correction", "", 0) rs = self.SelectRectangle() x = [np.int(rs.corners[0][0]), np.int(rs.corners[0][2])] y = [np.int(rs.corners[1][0]), np.int(rs.corners[1][2])] #x[0] = 650 #x[1]=7104 #y[0] = 225 #y[1] = 293 nod_roi = self.OD[y[0]:(y[1] + 1), x[0]:(x[1] + 1), 0:3] nod_roi = nod_roi - self.OD0 self.alpha = np.zeros([self.OD.shape[0], self.OD.shape[1]]) self.beta = np.zeros([self.OD.shape[0], self.OD.shape[1]]) lim = 0.2 cont = 0.0 cont2 = 0.0 cont_lim = nod_roi.shape[0] * nod_roi.shape[1] * nod_roi.shape[2] F = np.zeros([3, 1]) J = np.zeros([3, 3]) coef1 = self.Calibration.DevicParam_A coef2 = self.Calibration.DevicParam_B coef3 = self.Calibration.DevicParam_n alpha_media = self.Calibration.AlphaCal beta_media = self.Calibration.BetaCal lambda_alpha = 1.0 / np.power(self.Calibration.SigmaAlphaCal, 2) lambda_beta = 1.0 / np.power(self.Calibration.SigmaBetaCal, 2) sigma_coef1 = self.Calibration.Sigma_A sigma_coef2 = self.Calibration.Sigma_B sigma_coef3 = self.Calibration.Sigma_n od0 = self.OD0 indice_medio = 0 alpha_average = 0 beta_average = 0 for h in range(nod_roi.shape[0]): if cont2 / cont_lim > 0.05: cont2 = 0.0 print( f'Multichannel correction process: {np.trunc(100*cont/cont_lim)}%' ) for w in range(nod_roi.shape[1]): netOD = np.array([nod_roi[h, w, c] for c in [0, 1, 2]]) D_pixel = coef1 * netOD + np.sign(netOD) * coef2 * np.power( np.abs(netOD), coef3) sigma_D = np.sqrt( np.power(netOD * sigma_coef1, 2) + np.power(np.abs(netOD), 2 * coef3) * np.power(sigma_coef2, 2)) nod = np.array([ 0.0 + (netOD[c] > 0) * netOD[c] + (netOD[c] < 0) * 0.00001 for c in [0, 1, 2] ]) dose = coef1 * nod + coef2 * np.power(nod, coef3) d_dose = coef1 + coef3 * coef2 * np.power(nod, coef3 - 1) d2_dose = (coef3 - 1) * coef3 * coef2 * np.power( nod, coef3 - 2) d3_dose = (coef3 - 2) * (coef3 - 1) * coef3 * coef2 * np.power( nod, coef3 - 3) Ca = np.sum(np.power(d_dose * nod / sigma_D, 2)) + lambda_alpha Cb = np.sum(np.power(d_dose * od0 / sigma_D, 2)) + lambda_beta Cab = np.sum(np.power(d_dose / sigma_D, 2) * nod * od0) Cia = np.sum(d_dose * nod * (D_pixel - dose) / np.power(sigma_D, 2)) + lambda_alpha * alpha_media Cib = np.sum(d_dose * od0 * (D_pixel - dose) / np.power(sigma_D, 2)) + lambda_beta * beta_media d_Ca = 2 * d_dose * nod * (nod * d2_dose + d_dose) / np.power( sigma_D, 2) d_Cb = 2 * d_dose * d2_dose * np.power(od0 / sigma_D, 2) d_Cab = d_dose * od0 * (2 * nod * d2_dose + d_dose) / np.power( sigma_D, 2) d_Cia = (d2_dose * nod * (D_pixel - dose) + d_dose * (D_pixel - dose) - np.power(d_dose, 2) * nod) / np.power(sigma_D, 2) d_Cib = (d2_dose * od0 * (D_pixel - dose) - np.power(d_dose, 2) * od0) / np.power(sigma_D, 2) alpha = (Cia * Cb - Cib * Cab) / (Ca * Cb - Cab * Cab) beta = (Cia * Cab - Cib * Ca) / (Cab * Cab - Ca * Cb) if np.isnan(alpha) or np.isnan(beta): xx = 1 d_alpha = (d_Cia * Cb + Cia * d_Cb - d_Cib * Cab - Cib * d_Cab - alpha * (d_Ca * Cb + Ca * d_Cb - 2. * Cab * d_Cab)) / ( Ca * Cb - Cab * Cab) d_beta = (d_Cia * Cab + Cia * d_Cab - d_Cib * Ca - Cib * d_Ca - beta * (2. * Cab * d_Cab - d_Ca * Cb - Ca * d_Cb)) / ( Cab * Cab - Ca * Cb) var_NOD = nod * alpha + od0 * beta d_var_NOD = alpha + nod * d_alpha + od0 * d_beta mu = dose + d_dose * var_NOD der_mu = d_dose * (alpha + 1) + d2_dose * var_NOD d_mu = d_dose * (d_var_NOD + 1) + d2_dose * var_NOD d_der_mu = d2_dose * (alpha + 1 + d_var_NOD ) + d_dose * d_alpha + d3_dose * var_NOD dif = (mu - D_pixel) / np.power(sigma_D, 2) F[0, 0] = np.sum(dif * der_mu / d_dose) F[1, 0] = dose[0] - dose[1] F[2, 0] = dose[0] - dose[2] for c in np.arange(3): J[0, c] = d_mu[c] * der_mu[c] / (d_dose[c] * np.power(sigma_D[c], 2))+dif[c] * d_der_mu[c] / \ d_dose[c] - dif[c] * der_mu[c] * d2_dose[c] / np.power(d_dose[c], 2) J[1, 0] = d_dose[0] J[1, 1] = -d_dose[1] J[2, 0] = d_dose[0] J[2, 2] = -d_dose[2] A = np.linalg.inv(J) indice_max = 0 dif_dosis = 1 F_new = np.zeros([3, 1]) NOD_pix = np.array([0.0, 0.0, 0.0]) + nod NOD_new = NOD_pix - np.matrix.transpose(np.linalg.solve(J, F)) NOD_new[NOD_new < 0] = 1e-10 while (indice_max <= 30) and (dif_dosis > 1.0e-10): nod = np.array([0.0, 0.0, 0.0]) + NOD_new dose = coef1 * nod + coef2 * np.power(nod, coef3) d_dose = coef1 + coef3 * coef2 * np.power(nod, coef3 - 1) d2_dose = (coef3 - 1) * coef3 * coef2 * np.power( nod, coef3 - 2) Ca = np.sum(np.power(d_dose * nod / sigma_D, 2), axis=1)[0] + lambda_alpha Cb = np.sum(np.power(d_dose * od0 / sigma_D, 2), axis=1)[0] + lambda_beta Cab = np.sum(np.power(d_dose / sigma_D, 2) * nod * od0, axis=1)[0] Cia = np.sum(d_dose * nod * (D_pixel - dose) / np.power(sigma_D, 2),axis=1)[0] + \ lambda_alpha * alpha_media Cib = np.sum(d_dose * od0 * (D_pixel - dose) / np.power(sigma_D, 2),axis=1)[0] + \ lambda_beta * beta_media alpha = (Cia * Cb - Cib * Cab) / (Ca * Cb - Cab * Cab) beta = (Cia * Cab - Cib * Ca) / (Cab * Cab - Ca * Cb) var_NOD = nod * alpha + od0 * beta mu = dose + d_dose * var_NOD der_mu = d_dose * (alpha + 1) + d2_dose * var_NOD dif = (mu - D_pixel) / np.power(sigma_D, 2) F_new[0, 0] = np.sum(dif * der_mu / d_dose, axis=1)[0] F_new[1, 0] = dose[0, 0] - dose[0, 1] F_new[2, 0] = dose[0, 0] - dose[0, 2] dif_NOD = NOD_new - NOD_pix dif_F = F_new - F denom = np.matmul(dif_NOD, np.matmul(A, dif_F))[0] if denom == 0: denom = 1 #dif_NODt = np.zeros([1,3]) #dif_NODt[0,:] = dif_NOD[:] A = A + np.matmul( np.matmul( np.transpose(dif_NOD) - np.matmul(A, dif_F), dif_NOD), A) / denom NOD_pix = np.array([0.0, 0.0, 0.0]) + NOD_new NOD_new = NOD_pix - np.matrix.transpose(np.matmul( A, F_new)) NOD_new[NOD_new < 0] = 1e-10 F = np.zeros([3, 1]) + F_new dosis_new = coef1 * NOD_new + coef2 * np.power( NOD_new, coef3) dif_dosis = np.max( np.abs( np.array([ dosis_new[0, 0] - dosis_new[0, 1], dosis_new[0, 0] - dosis_new[0, 2], dosis_new[0, 1] - dosis_new[0, 2] ]))) / np.min(dosis_new) indice_max = indice_max + 1 indice_medio = indice_medio + indice_max alpha_average = alpha_average + alpha beta_average = beta_average + beta self.alpha[h + y[0], w + x[0]] = alpha self.beta[h + y[0], w + x[0]] = beta for c in [0, 1, 2]: #aux = NOD_new[0, c] + od0[c] self.OD[h + y[0], w + x[0], c] = NOD_new[0, c] + od0[c] #self.OD[h + y[0], w + x[0], c] = (self.OD[h + y[0], w + x[0], c]- # od0[c]*(1+beta))/(1.0+alpha) + od0[c] cont2 = cont2 + 3.0 cont = cont + 3.0 indice_medio = indice_medio / (nod_roi.shape[0] * nod_roi.shape[1]) alpha_average = alpha_average / (nod_roi.shape[0] * nod_roi.shape[1]) beta_average = beta_average / (nod_roi.shape[0] * nod_roi.shape[1]) print(f'alpha average = {alpha_average}') print(f'beta average = {beta_average}') print(f'indice medio = {indice_medio}') alpha_image = np.array((65535 * 0.5 + self.alpha * 1000)) np.clip(alpha_image, 0, 65535) imname = 'AlphaMap_' + self.imagefilename tifffile.imwrite(self.workingdir + imname, alpha_image.astype(np.uint16), resolution=(self.dpi[0], self.dpi[1])) beta_image = np.array((65535 * 0.5 + self.beta * 1000)) np.clip(beta_image, 0, 65535) imname = 'BetaMap_' + self.imagefilename tifffile.imwrite(self.workingdir + imname, beta_image.astype(np.uint16), resolution=(self.dpi[0], self.dpi[1]))
def execute(self): init_process_logger('log.txt') self.output_log.setValue('log.txt') from os.path import basename from flyingpigeon import sdm from flyingpigeon.utils import archive, archiveextract, download self.status.set('Start process', 0) try: logger.info('reading the arguments') resources_raw = self.getInputValues(identifier='resources') csv_url = self.getInputValues(identifier='gbif')[0] period = self.getInputValues(identifier='period') period = period[0] indices = self.getInputValues(identifier='input_indices') archive_format = self.archive_format.getValue() logger.info('indices %s ' % indices) logger.debug('csv_url %s' % csv_url) except Exception as e: logger.error('failed to read in the arguments %s ' % e) raise try: logger.info('set up the environment') csv_file = download(csv_url) resources = archiveextract(resources_raw) except Exception as e: logger.error('failed to set up the environment %s ' % e) raise try: self.status.set('read in latlon coordinates', 10) latlon = sdm.latlon_gbifcsv(csv_file) logger.info('got occurence coordinates %s ' % csv_file) except Exception as e: logger.exception( 'failed to extract the latlon points from file: %s: %s' % (csv_file, e)) try: self.status.set('plot map', 20) from flyingpigeon.visualisation import map_gbifoccurrences # latlon = sdm.latlon_gbifdic(gbifdic) occurence_map = map_gbifoccurrences(latlon) except Exception as e: logger.exception('failed to plot occurence map %s' % e) ################################# # calculate the climate indices ################################# # get the indices ncs_indices = None try: self.status.set( 'start calculation of climate indices for %s' % indices, 30) ncs_indices = sdm.get_indices(resources=resources, indices=indices) logger.info('indice calculation done') except: msg = 'failed to calculate indices' logger.exception(msg) raise Exception(msg) try: self.status.set('get domain', 30) domains = set() for resource in ncs_indices: # get_domain works only if metadata are set in a correct way domains = domains.union([basename(resource).split('_')[1]]) if len(domains) == 1: domain = list(domains)[0] logger.debug('Domain %s found in indices files' % domain) else: logger.error('Not a single domain in indices files %s' % domains) except Exception as e: logger.exception('failed to get domains %s' % e) try: self.status.set('generating the PA mask', 20) PAmask = sdm.get_PAmask(coordinates=latlon, domain=domain) logger.info('PA mask sucessfully generated') except Exception as e: logger.exception('failed to generate the PA mask: %s' % e) try: self.status.set('Ploting PA mask', 25) from flyingpigeon.visualisation import map_PAmask PAmask_png = map_PAmask(PAmask) except Exception as e: logger.exception('failed to plot the PA mask: %s' % e) try: # sort indices indices_dic = None indices_dic = sdm.sort_indices(ncs_indices) logger.info('indice files sorted for %s Datasets' % len(indices_dic.keys())) except: msg = 'failed to sort indices' logger.exception(msg) raise Exception(msg) ncs_references = [] species_files = [] stat_infos = [] for count, key in enumerate(indices_dic.keys()): try: staus_nr = 40 + count * 10 self.status.set('Start processing of %s' % key, staus_nr) ncs = indices_dic[key] logger.info('with %s files' % len(ncs)) try: ncs_reference = sdm.get_reference(ncs_indices=ncs, period=period) ncs_references.extend(ncs_reference) logger.info('reference indice calculated %s ' % ncs_references) except: msg = 'failed to calculate the reference' logger.exception(msg) raise Exception(msg) try: gam_model, predict_gam, gam_info = sdm.get_gam( ncs_reference, PAmask) stat_infos.append(gam_info) self.status.set('GAM sucessfully trained', staus_nr + 5) except Exception as e: msg = 'failed to train GAM for %s : %s' % (key, e) logger.debug(msg) try: prediction = sdm.get_prediction(gam_model, ncs) self.status.set('prediction done', staus_nr + 7) except Exception as e: msg = 'failed to predict tree occurence %s' % e logger.exception(msg) # raise Exception(msg) try: self.status.set('land sea mask for predicted data', staus_nr + 8) from numpy import invert, isnan, nan, broadcast_arrays # , array, zeros, linspace, meshgrid mask = invert(isnan(PAmask)) mask = broadcast_arrays(prediction, mask)[1] prediction[mask is False] = nan except Exception as e: logger.debug('failed to mask predicted data: %s' % e) try: species_files.append(sdm.write_to_file(ncs[0], prediction)) logger.info('Favourabillity written to file') except Exception as e: msg = 'failed to write species file %s' % e logger.debug(msg) # raise Exception(msg) except Exception as e: msg = 'failed to calculate reference indices. %s ' % e logger.exception(msg) raise Exception(msg) try: archive_indices = None archive_indices = archive(ncs_indices, format=archive_format) logger.info('indices added to archive') except: msg = 'failed adding indices to archive' logger.exception(msg) raise Exception(msg) archive_references = None try: archive_references = archive(ncs_references, format=archive_format) logger.info('indices reference added to archive') except: msg = 'failed adding reference indices to archive' logger.exception(msg) raise Exception(msg) archive_predicion = None try: archive_predicion = archive(species_files, format=archive_format) logger.info('species_files added to archive') except: msg = 'failed adding species_files indices to archive' logger.exception(msg) raise Exception(msg) try: from flyingpigeon.visualisation import pdfmerge stat_infosconcat = pdfmerge(stat_infos) logger.info('stat infos pdfs merged') except: logger.exception('failed to concat images') _, stat_infosconcat = tempfile.mkstemp(suffix='.pdf', prefix='foobar-', dir='.') # self.output_csv.setValue(csv_file) self.output_gbif.setValue(occurence_map) self.output_PA.setValue(PAmask_png) self.output_indices.setValue(archive_indices) self.output_reference.setValue(archive_references) self.output_prediction.setValue(archive_predicion) self.output_info.setValue(stat_infosconcat) self.status.set('done', 100)
# In[31]: import numpy as np np.nan == None # In[32]: np.nan == np.nan # In[33]: np.isnan(np.nan) # In[34]: sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'} s = pd.Series(sports) s # In[35]: s.index
def testMeanNonInfNaN(self): prob = tf.random.uniform([int(1e4)], seed=test_util.test_seed()) dist = tfd.ContinuousBernoulli(probs=prob, validate_args=True) mean_ = self.evaluate(dist.mean()) self.assertFalse(np.any(np.isinf(mean_))) self.assertFalse(np.any(np.isnan(mean_)))
print(similarity) def cross_entropy(predictions, targets): N = predictions.shape[0] ce = -np.sum(targets * np.log(predictions)) / N return ce for factor in factors: probs = probs_true * factor probs[:2, 1] = 1 - factor probs[2:, 0] = 1 - factor predictions = torch.mm(probs, torch.transpose(probs, 0, 1)) print(predictions) print(cross_entropy(predictions.numpy(), similarity.numpy())) _loss = losses["MOESimVAELoss"].similarity(probs, similarity) _loss = torch.mean(torch.sum(_loss, dim=1), dim=0) _ce_loss = log_loss(probs_true[:, 0], probs[:, 0]) if np.isnan(_ce_loss): _ce_loss = 0. print(probs) print(_loss, _ce_loss) sim_losses.append(_loss.cpu().numpy()) ce_losses.append(_ce_loss) fig, ax = plt.subplots() ax.plot(factors, sim_losses, color="blue", label="SIM") ax.plot(factors, ce_losses, color="green", label="CE") ax.set_title("SIMILARITY losses", fontsize=10) ax.set_xlabel("factors") ax.grid(True, which="both") ax.legend() #############################################################################
def test_rpc_trade_status(default_conf, ticker, fee, mocker) -> None: mocker.patch('freqtrade.rpc.telegram.Telegram', MagicMock()) mocker.patch.multiple( 'freqtrade.exchange.Exchange', fetch_ticker=ticker, get_fee=fee, ) freqtradebot = get_patched_freqtradebot(mocker, default_conf) patch_get_signal(freqtradebot, (True, False)) rpc = RPC(freqtradebot) freqtradebot.state = State.RUNNING with pytest.raises(RPCException, match=r'.*no active trade*'): rpc._rpc_trade_status() freqtradebot.enter_positions() trades = Trade.get_open_trades() trades[0].open_order_id = None freqtradebot.exit_positions(trades) results = rpc._rpc_trade_status() assert results[0] == { 'trade_id': 1, 'pair': 'ETH/BTC', 'base_currency': 'BTC', 'open_date': ANY, 'open_date_hum': ANY, 'open_timestamp': ANY, 'is_open': ANY, 'fee_open': ANY, 'fee_open_cost': ANY, 'fee_open_currency': ANY, 'fee_close': fee.return_value, 'fee_close_cost': ANY, 'fee_close_currency': ANY, 'open_rate_requested': ANY, 'open_trade_value': 0.0010025, 'close_rate_requested': ANY, 'sell_reason': ANY, 'sell_order_status': ANY, 'min_rate': ANY, 'max_rate': ANY, 'strategy': ANY, 'timeframe': 5, 'open_order_id': ANY, 'close_date': None, 'close_date_hum': None, 'close_timestamp': None, 'open_rate': 1.098e-05, 'close_rate': None, 'current_rate': 1.099e-05, 'amount': 91.07468123, 'amount_requested': 91.07468123, 'stake_amount': 0.001, 'trade_duration': None, 'trade_duration_s': None, 'close_profit': None, 'close_profit_pct': None, 'close_profit_abs': None, 'current_profit': -0.00408133, 'current_profit_pct': -0.41, 'current_profit_abs': -4.09e-06, 'profit_ratio': -0.00408133, 'profit_pct': -0.41, 'profit_abs': -4.09e-06, 'profit_fiat': ANY, 'stop_loss_abs': 9.882e-06, 'stop_loss_pct': -10.0, 'stop_loss_ratio': -0.1, 'stoploss_order_id': None, 'stoploss_last_update': ANY, 'stoploss_last_update_timestamp': ANY, 'initial_stop_loss_abs': 9.882e-06, 'initial_stop_loss_pct': -10.0, 'initial_stop_loss_ratio': -0.1, 'stoploss_current_dist': -1.1080000000000002e-06, 'stoploss_current_dist_ratio': -0.10081893, 'stoploss_current_dist_pct': -10.08, 'stoploss_entry_dist': -0.00010475, 'stoploss_entry_dist_ratio': -0.10448878, 'open_order': None, 'exchange': 'bittrex', } mocker.patch('freqtrade.freqtradebot.FreqtradeBot.get_sell_rate', MagicMock(side_effect=ExchangeError("Pair 'ETH/BTC' not available"))) results = rpc._rpc_trade_status() assert isnan(results[0]['current_profit']) assert isnan(results[0]['current_rate']) assert results[0] == { 'trade_id': 1, 'pair': 'ETH/BTC', 'base_currency': 'BTC', 'open_date': ANY, 'open_date_hum': ANY, 'open_timestamp': ANY, 'is_open': ANY, 'fee_open': ANY, 'fee_open_cost': ANY, 'fee_open_currency': ANY, 'fee_close': fee.return_value, 'fee_close_cost': ANY, 'fee_close_currency': ANY, 'open_rate_requested': ANY, 'open_trade_value': ANY, 'close_rate_requested': ANY, 'sell_reason': ANY, 'sell_order_status': ANY, 'min_rate': ANY, 'max_rate': ANY, 'strategy': ANY, 'timeframe': ANY, 'open_order_id': ANY, 'close_date': None, 'close_date_hum': None, 'close_timestamp': None, 'open_rate': 1.098e-05, 'close_rate': None, 'current_rate': ANY, 'amount': 91.07468123, 'amount_requested': 91.07468123, 'trade_duration': ANY, 'trade_duration_s': ANY, 'stake_amount': 0.001, 'close_profit': None, 'close_profit_pct': None, 'close_profit_abs': None, 'current_profit': ANY, 'current_profit_pct': ANY, 'current_profit_abs': ANY, 'profit_ratio': ANY, 'profit_pct': ANY, 'profit_abs': ANY, 'profit_fiat': ANY, 'stop_loss_abs': 9.882e-06, 'stop_loss_pct': -10.0, 'stop_loss_ratio': -0.1, 'stoploss_order_id': None, 'stoploss_last_update': ANY, 'stoploss_last_update_timestamp': ANY, 'initial_stop_loss_abs': 9.882e-06, 'initial_stop_loss_pct': -10.0, 'initial_stop_loss_ratio': -0.1, 'stoploss_current_dist': ANY, 'stoploss_current_dist_ratio': ANY, 'stoploss_current_dist_pct': ANY, 'stoploss_entry_dist': -0.00010475, 'stoploss_entry_dist_ratio': -0.10448878, 'open_order': None, 'exchange': 'bittrex', }
def __call__(self, test_rows, chunk_size): import sqlalchemy as sa from sqlalchemy.sql import elements with create_sa_connection(self._con, **(self._engine_kwargs or dict())) as con: self._con = str(con.engine.url) selectable = self._get_selectable(con) # process index_col index_col = self._index_col if index_col is not None: if not isinstance(index_col, (list, tuple)): index_col = (index_col,) new_index_col = [] for col in index_col: if isinstance(col, (sa.Column, elements.Label)): new_index_col.append(col.name) elif isinstance(col, str): new_index_col.append(col) elif col is not None: raise TypeError(f'unknown index_col type: {type(col)}') self._index_col = new_index_col # process columns columns = self._columns or [] new_columns = [] for col in columns: if isinstance(col, str): new_columns.append(col) else: new_columns.append(col.name) self._columns = new_columns if self._columns: collect_cols = self._columns + (self._index_col or []) else: collect_cols = [] test_df, shape = self._collect_info(con, selectable, collect_cols, test_rows) # reconstruct selectable using known column names if not collect_cols: self._columns = list(test_df.columns) if self._selectable is not None: self._selectable = None self._get_selectable(con, columns=self._columns + (self._index_col or [])) if self.method == 'partition': if not self.index_col or self.partition_col not in self.index_col: part_frame = test_df else: part_frame = test_df.index.to_frame() if not issubclass(part_frame[self.partition_col].dtype.type, (np.number, np.datetime64)): raise TypeError('Type of partition column should be numeric or datetime, ' f'now it is {test_df[self.partition_col].dtype}') if isinstance(test_df.index, pd.RangeIndex): index_value = parse_index(pd.RangeIndex(shape[0] if not np.isnan(shape[0]) else -1), str(selectable), self._con) else: index_value = parse_index(test_df.index) columns_value = parse_index(test_df.columns, store_data=True) dtypes = test_df.dtypes use_arrow_dtype = self._use_arrow_dtype if use_arrow_dtype is None: use_arrow_dtype = options.dataframe.use_arrow_dtype if use_arrow_dtype: dtypes = to_arrow_dtypes(dtypes, test_df=test_df) return self.new_dataframe(None, shape=shape, dtypes=dtypes, index_value=index_value, columns_value=columns_value, raw_chunk_size=chunk_size)
def sum_nan(y): if np.isnan(y).all(): return float("nan") return np.nansum(y)
def test_rpc_trade_statistics(default_conf, ticker, ticker_sell_up, fee, limit_buy_order, limit_sell_order, mocker) -> None: mocker.patch.multiple( 'freqtrade.rpc.fiat_convert.CoinGeckoAPI', get_price=MagicMock(return_value={'bitcoin': {'usd': 15000.0}}), ) mocker.patch('freqtrade.rpc.rpc.CryptoToFiatConverter._find_price', return_value=15000.0) mocker.patch('freqtrade.rpc.telegram.Telegram', MagicMock()) mocker.patch.multiple( 'freqtrade.exchange.Exchange', fetch_ticker=ticker, get_fee=fee, ) freqtradebot = get_patched_freqtradebot(mocker, default_conf) patch_get_signal(freqtradebot, (True, False)) stake_currency = default_conf['stake_currency'] fiat_display_currency = default_conf['fiat_display_currency'] rpc = RPC(freqtradebot) rpc._fiat_converter = CryptoToFiatConverter() res = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency) assert res['trade_count'] == 0 assert res['first_trade_date'] == '' assert res['first_trade_timestamp'] == 0 assert res['latest_trade_date'] == '' assert res['latest_trade_timestamp'] == 0 # Create some test data freqtradebot.enter_positions() trade = Trade.query.first() # Simulate fulfilled LIMIT_BUY order for trade trade.update(limit_buy_order) # Update the ticker with a market going up mocker.patch.multiple( 'freqtrade.exchange.Exchange', fetch_ticker=ticker_sell_up ) trade.update(limit_sell_order) trade.close_date = datetime.utcnow() trade.is_open = False freqtradebot.enter_positions() trade = Trade.query.first() # Simulate fulfilled LIMIT_BUY order for trade trade.update(limit_buy_order) # Update the ticker with a market going up mocker.patch.multiple( 'freqtrade.exchange.Exchange', fetch_ticker=ticker_sell_up ) trade.update(limit_sell_order) trade.close_date = datetime.utcnow() trade.is_open = False stats = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency) assert prec_satoshi(stats['profit_closed_coin'], 6.217e-05) assert prec_satoshi(stats['profit_closed_percent_mean'], 6.2) assert prec_satoshi(stats['profit_closed_fiat'], 0.93255) assert prec_satoshi(stats['profit_all_coin'], 5.802e-05) assert prec_satoshi(stats['profit_all_percent_mean'], 2.89) assert prec_satoshi(stats['profit_all_fiat'], 0.8703) assert stats['trade_count'] == 2 assert stats['first_trade_date'] == 'just now' assert stats['latest_trade_date'] == 'just now' assert stats['avg_duration'] == '0:00:00' assert stats['best_pair'] == 'ETH/BTC' assert prec_satoshi(stats['best_rate'], 6.2) # Test non-available pair mocker.patch('freqtrade.freqtradebot.FreqtradeBot.get_sell_rate', MagicMock(side_effect=ExchangeError("Pair 'ETH/BTC' not available"))) stats = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency) assert stats['trade_count'] == 2 assert stats['first_trade_date'] == 'just now' assert stats['latest_trade_date'] == 'just now' assert stats['avg_duration'] == '0:00:00' assert stats['best_pair'] == 'ETH/BTC' assert prec_satoshi(stats['best_rate'], 6.2) assert isnan(stats['profit_all_coin'])
def transform(self, patient_df, pid=None): ''' Transformer method, taking as input a data-frame with irregularly sampled input data. The method assumes that the data-frame contains a time-stamp column, and the data-frame is sorted along the first axis in non-decreasing order with respect to the timestamp column. Pass the <pid> of the patient stay as additional information''' self._check_state() static_table = self.df_static[self.df_static["PatientID"] == pid] # No static data, patient is not valid, exclude on-the-fly if static_table.shape[0] == 0: print("WARNING: No static data in patient table...") return None # More than one row, select one of the rows arbitrarily if static_table.shape[0] > 1: print("WARNING: More than one row in static table...") static_table = static_table.take([0], axis=0) static_height = float(static_table["Height"]) static_gender = str(static_table["Sex"].values[0]).strip() assert (static_gender in ["F", "M", "U"]) if static_gender in ["F", "M"]: typical_weight = self.typical_weight_dict[static_gender] else: typical_weight = (self.typical_weight_dict["M"] + self.typical_weight_dict["F"]) / 2.0 personal_bmi = self.median_bmi_dict[self.key_dict[static_gender]] ## If either the endpoints or the features don't exist, log the failure but do nothing, the missing patients can be # latter added as a new group to the output H5 if patient_df.shape[0] == 0: print( "WARNING: p{} has missing features, skipping output generation..." .format(pid)) return None all_keys = list( set(patient_df.columns.values.tolist()).difference( set(["Datetime", "PatientID", "a_temp", "m_pm_1", "m_pm_2"]))) ts = patient_df["Datetime"] ts_arr = np.array(ts) n_ts = ts_arr.size if self.is_dim_reduced: hr = np.array(patient_df["vm1"]) else: hr = np.array(patient_df["v200"]) finite_hr = ts_arr[np.isfinite(hr)] if finite_hr.size == 0: print("WARNING: Patient {} has no HR, ignoring patient...".format( pid)) return None ts_min = ts_arr[np.isfinite(hr)][0] ts_max = ts_arr[np.isfinite(hr)][-1] max_ts_diff = (ts_max - ts_min) / np.timedelta64(1, 's') time_grid = np.arange( 0.0, min(max_ts_diff + 1.0, self.max_grid_length_secs), self.grid_period) time_grid_abs = [ ts_min + pdts.Timedelta(seconds=time_grid[idx]) for idx in range(time_grid.size) ] imputed_df_dict = {} imputed_df_dict[self.patient_id_key] = [int(pid)] * time_grid.size imputed_df_dict[self.rel_datetime_key] = time_grid imputed_df_dict[self.abs_datetime_key] = time_grid_abs ## There is nothing to do if the patient has no records, just return... if n_ts == 0: print( "WARNING: p{} has an empty record, skipping output generation..." .format(patient)) return None ## Initialize the storage for the imputed time grid, NANs for the non-pharma, 0 for pharma. for col in all_keys: if col[0] == "p": imputed_df_dict[col] = np.zeros(time_grid.size) elif col[0] == "v": imputed_df_dict[col] = mlhc_array.empty_nan(time_grid.size) else: print("ERROR: Invalid variable type") assert (False) imputed_df = pd.DataFrame(imputed_df_dict) norm_ts = np.array(ts - ts_min) / np.timedelta64(1, 's') # Schedule for order of variable imputation if self.is_dim_reduced: all_keys.remove("vm131") all_keys = ["vm131"] + all_keys else: all_keys.remove("v10000400") all_keys = ["v10000400"] + all_keys ## Impute all variables independently, with the two relevant cases pharma variable and other variable, # distinguishable from the variable prefix. We enforce that weight is the first variable to be imputed, so that # its time-gridded information can later be used by other custom formulae imputations that depend on it. for var_idx, variable in enumerate(all_keys): df_var = patient_df[variable] assert (n_ts == df_var.shape[0] == norm_ts.size) ## Non-pharma variable case if variable[0] == "v": valid_normal = False var_encoding = self.var_encoding_map[variable] # Saved a value in the dict of normal values if variable in self.normal_dict: saved_normal_var = self.normal_dict[variable] # Saved normal value is already numeric, no need to encode it here... if mlhc_math.is_numeric(saved_normal_var) and np.isfinite( saved_normal_var): global_impute_val = saved_normal_var valid_normal = True # Could not determine a valid normal value, have to fall back to pre-computed global statistic if not valid_normal: # Fill in the weight using BMI calculations if variable in ["v10000400", "vm131"]: # If we have an observed height can use BMI if np.isfinite(static_height): global_impute_val = personal_bmi * (static_height / 100)**2 else: global_impute_val = typical_weight # Fill in with the global statistic elif variable in self.global_impute_dict: global_impute_val = self.global_impute_dict[variable] # Rare case, no observation in the imputation data-set else: global_impute_val = np.nan # Default values where median/IQR interval not saved if variable not in self.interval_median_dict: fill_interval_secs = self.default_fill_interval_secs rolling_mean_secs = self.default_rolling_mean_secs fill_interval_secs = self.default_fill_interval_secs # We have to impose minimum period to have boundary conditions where the backward window for # slope estimation is empty or an observation is not even filled to the next grid point to the right. else: med_interval = self.interval_median_dict[variable] iqr_interval = self.interval_iqr_dict[variable] base_val = med_interval + 2 * iqr_interval fill_interval_secs = max(self.grid_period, base_val) rolling_mean_secs = max(2 * self.grid_period, 2 * base_val) return_mean_secs = max(2 * self.grid_period, base_val) raw_col = np.array(df_var) assert (raw_col.size == norm_ts.size) observ_idx = np.isfinite(raw_col) observ_ts = norm_ts[observ_idx] observ_val = raw_col[observ_idx] ## No values have been observed for this variable, it has to be imputed using the global mean if observ_val.size == 0: est_vals = mlhc_array.value_empty(time_grid.size, global_impute_val) imputed_df[variable] = est_vals imputed_df["{}_IMPUTED_STATUS_CUM_COUNT".format( variable)] = np.zeros(time_grid.size) imputed_df["{}_IMPUTED_STATUS_TIME_TO".format( variable)] = mlhc_array.value_empty( time_grid.size, -1.0) continue assert (np.isfinite(observ_val).all()) assert (np.isfinite(observ_ts).all()) if self.use_adaptive_impute: # Formulae imputation if variable in [ "v1000", "v1010", "v10020000", "v30005010", "v30005110", "vm13", "vm24", "vm31", "vm32" ]: existing_weight_col = np.array( imputed_df["vm131"] ) if self.is_dim_reduced else np.array( imputed_df["v10000400"]) est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill( observ_ts, observ_val, time_grid, global_impute_val, self.grid_period, fill_interval_secs=fill_interval_secs, rolling_mean_secs=rolling_mean_secs, return_mean_secs=return_mean_secs, var_type="non_pharma", var_encoding=var_encoding, variable_id=variable, weight_imputed_col=existing_weight_col, static_height=static_height, personal_bmi=personal_bmi) elif variable in ["v10000400", "vm131"]: est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill( observ_ts, observ_val, time_grid, global_impute_val, self.grid_period, var_type="weight") else: est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill( observ_ts, observ_val, time_grid, global_impute_val, self.grid_period, fill_interval_secs=fill_interval_secs, rolling_mean_secs=rolling_mean_secs, return_mean_secs=return_mean_secs, var_type="non_pharma", var_encoding=var_encoding, variable_id=variable) else: assert (False) est_vals = bern_forward_fill.impute_forward_fill( observ_ts, observ_val, time_grid, global_mean_var) assert (np.isnan(global_impute_val) or np.isfinite(est_vals).all()) imputed_df[variable] = est_vals imputed_df["{}_IMPUTED_STATUS_CUM_COUNT".format( variable)] = cum_count_ts imputed_df["{}_IMPUTED_STATUS_TIME_TO".format( variable)] = time_to_last_ms ## Pharma variable case, the doses have to be recomputed to the time-grid. The global imputation value is 0, because the rate assumed w/o observation # is 0 (no medication flow) elif variable[0] == "p": global_impute_val = 0.0 raw_col = np.array(df_var) assert (raw_col.size == norm_ts.size) observ_idx = np.isfinite(raw_col) observ_ts = norm_ts[observ_idx] observ_val = raw_col[observ_idx] ## No values have been observed for this pharma-variable, leave Zero in this series if observ_val.size == 0: continue assert (np.isfinite(observ_val).all()) assert (np.isfinite(observ_ts).all()) est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill( observ_ts, observ_val, time_grid, global_impute_val, self.grid_period, var_type="pharma") assert (np.isfinite(est_vals).all()) imputed_df[variable] = est_vals else: print("ERROR: Invalid variable, exiting...") assert (False) return imputed_df
def generateGroupAdditivityValues(self, trainingSet, kunits, method='Arrhenius'): """ Generate the group additivity values using the given `trainingSet`, a list of 2-tuples of the form ``(template, kinetics)``. You must also specify the `kunits` for the family and the `method` to use when generating the group values. Returns ``True`` if the group values have changed significantly since the last time they were fitted, or ``False`` otherwise. """ warnings.warn( "Group additivity is no longer supported and may be" " removed in version 2.3.", DeprecationWarning) # keep track of previous values so we can detect if they change old_entries = dict() for label, entry in self.entries.items(): if entry.data is not None: old_entries[label] = entry.data # Determine a complete list of the entries in the database, sorted as in the tree groupEntries = self.top[:] for entry in self.top: groupEntries.extend(self.descendants(entry)) # Determine a unique list of the groups we will be able to fit parameters for groupList = [] for template, kinetics in trainingSet: for group in template: if group not in self.top: groupList.append(group) groupList.extend(self.ancestors(group)[:-1]) groupList = list(set(groupList)) groupList.sort(key=lambda x: x.index) if method == 'KineticsData': # Fit a discrete set of k(T) data points by training against k(T) data Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) # Initialize dictionaries of fitted group values and uncertainties groupValues = {} groupUncertainties = {} groupCounts = {} groupComments = {} for entry in groupEntries: groupValues[entry] = [] groupUncertainties[entry] = [] groupCounts[entry] = [] groupComments[entry] = set() # Generate least-squares matrix and vector A = [] b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception( 'Unexpected kinetics model of type {0} for template {1}.' .format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination for groups in combinations: Arow = [1 if group in groups else 0 for group in groupList] Arow.append(1) brow = [math.log10(k) for k in kd] A.append(Arow) b.append(brow) for group in groups: groupComments[group].add("{0!s}".format(template)) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) for t, T in enumerate(Tdata): # Determine error in each group (on log scale) stdev = numpy.zeros(len(groupList) + 1, numpy.float64) count = numpy.zeros(len(groupList) + 1, numpy.int) for index in range(len(trainingSet)): template, kinetics = trainingSet[index] kd = math.log10(kdata[index, t]) km = x[-1, t] + sum([ x[groupList.index(group), t] for group in template if group in groupList ]) variance = (km - kd)**2 for group in template: groups = [group] groups.extend(self.ancestors(group)) for g in groups: if g not in self.top: ind = groupList.index(g) stdev[ind] += variance count[ind] += 1 stdev[-1] += variance count[-1] += 1 stdev = numpy.sqrt(stdev / (count - 1)) import scipy.stats ci = scipy.stats.t.ppf(0.975, count - 1) * stdev # Update dictionaries of fitted group values and uncertainties for entry in groupEntries: if entry == self.top[0]: groupValues[entry].append(10**x[-1, t]) groupUncertainties[entry].append(10**ci[-1]) groupCounts[entry].append(count[-1]) elif entry in groupList: index = groupList.index(entry) groupValues[entry].append(10**x[index, t]) groupUncertainties[entry].append(10**ci[index]) groupCounts[entry].append(count[index]) else: groupValues[entry] = None groupUncertainties[entry] = None groupCounts[entry] = None # Store the fitted group values and uncertainties on the associated entries for entry in groupEntries: if groupValues[entry] is not None: entry.data = KineticsData(Tdata=(Tdata, "K"), kdata=(groupValues[entry], kunits)) if not any( numpy.isnan(numpy.array( groupUncertainties[entry]))): entry.data.kdata.uncertainties = numpy.array( groupUncertainties[entry]) entry.data.kdata.uncertaintyType = '*|/' entry.shortDesc = "Group additive kinetics." entry.longDesc = "Fitted to {0} rates.\n".format( groupCounts[entry]) entry.longDesc += "\n".join(groupComments[entry]) else: entry.data = None elif method == 'Arrhenius': # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) logTdata = numpy.log(Tdata) Tinvdata = 1000. / (constants.R * Tdata) A = [] b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception( 'Unexpected kinetics model of type {0} for template {1}.' .format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination at each temperature for t, T in enumerate(Tdata): logT = logTdata[t] Tinv = Tinvdata[t] for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.extend([1, logT, -Tinv]) else: Arow.extend([0, 0, 0]) Arow.extend([1, logT, -Tinv]) brow = math.log(kd[t]) A.append(Arow) b.append(brow) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-3]), kunits), n=x[-2], Ea=(x[-1], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A=(math.exp(x[3 * i]), kunits), n=x[3 * i + 1], Ea=(x[3 * i + 2], "kJ/mol"), T0=(1, "K"), ) elif method == 'Arrhenius2': # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values A = [] b = [] for template, kinetics in trainingSet: # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each parameter if isinstance(kinetics, Arrhenius) or (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0): for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.append(1) else: Arow.append(0) Arow.append(1) Ea = kinetics.E0.value_si if isinstance( kinetics, ArrheniusEP) else kinetics.Ea.value_si brow = [ math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000. ] A.append(Arow) b.append(brow) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-1, 0]), kunits), n=x[-1, 1], Ea=(x[-1, 2], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A=(math.exp(x[i, 0]), kunits), n=x[i, 1], Ea=(x[i, 2], "kJ/mol"), T0=(1, "K"), ) # Add a note to the history of each changed item indicating that we've generated new group values changed = False for label, entry in self.entries.items(): if entry.data is not None and old_entries.has_key(label): if (isinstance(entry.data, KineticsData) and isinstance(old_entries[label], KineticsData) and len(entry.data.kdata.value_si) == len( old_entries[label].kdata.value_si) and all( abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)): #print "New group values within 1% of old." pass elif (isinstance(entry.data, Arrhenius) and isinstance(old_entries[label], Arrhenius) and abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01): #print "New group values within 1% of old." pass else: changed = True break else: changed = True break return changed
def local_thresholding_prop(conn_matrix, thr): from pynets import thresholding from pynets.stats import netstats ''' Threshold the adjacency matrix by building from the minimum spanning tree (MST) and adding successive N-nearest neighbour degree graphs to achieve target proportional threshold. ''' fail_tol = 10 conn_matrix = np.nan_to_num(conn_matrix) G = nx.from_numpy_matrix(conn_matrix) if not nx.is_connected(G): [G, _] = netstats.prune_disconnected(G) maximum_edges = G.number_of_edges() G = thresholding.weight_to_distance(G) min_t = nx.minimum_spanning_tree(G, weight="distance") len_edges = min_t.number_of_edges() upper_values = np.triu_indices(np.shape(conn_matrix)[0], k=1) weights = np.array(conn_matrix[upper_values]) weights = weights[~np.isnan(weights)] edgenum = int(float(thr) * float(len(weights))) if len_edges > edgenum: print("%s%s%s" % ('Warning: The minimum spanning tree already has: ', len_edges, ' edges, select more edges. Local Threshold will be applied by just retaining the Minimum ' 'Spanning Tree')) conn_matrix_thr = nx.to_numpy_array(G) return conn_matrix_thr k = 1 len_edge_list = [] while len_edges < edgenum and k <= np.shape(conn_matrix)[0] and (len(len_edge_list[-fail_tol:]) - len(set(len_edge_list[-fail_tol:]))) < (fail_tol-1): print(k) print(len_edges) len_edge_list.append(len_edges) # Create nearest neighbour graph nng = thresholding.knn(conn_matrix, k) number_before = nng.number_of_edges() # Remove edges from the NNG that exist already in the new graph/MST nng.remove_edges_from(min_t.edges()) if nng.number_of_edges() == 0 and number_before >= maximum_edges: break # Add weights to NNG for e in nng.edges(): nng.edges[e[0], e[1]]['weight'] = float(conn_matrix[e[0], e[1]]) # Obtain list of edges from the NNG in order of weight edge_list = sorted(nng.edges(data=True), key=lambda t: t[2]['weight'], reverse=True) # Add edges in order of connectivity strength for edge in edge_list: #print("%s%s" % ('Adding edge to mst: ', edge)) min_t.add_edges_from([edge]) min_t_mx = nx.to_numpy_array(min_t) len_edges = nx.from_numpy_matrix(min_t_mx).number_of_edges() if len_edges >= edgenum: #print(len_edges) break if (len(len_edge_list[-fail_tol:]) - len(set(len_edge_list[-fail_tol:]))) >= (fail_tol-1): print("%s%s%s" % ('Cannot apply local thresholding to achieve threshold of: ', thr, '. Using maximally saturated connected matrix instead...')) k += 1 conn_matrix_thr = nx.to_numpy_array(min_t, nodelist=sorted(min_t.nodes()), dtype=np.float64) if len(min_t.nodes()) < conn_matrix.shape[0]: raise RuntimeWarning("%s%s%s" % ('Cannot apply local thresholding to achieve threshold of: ', thr, '. Try a higher -thr or -min_thr')) return conn_matrix_thr
print(Ei) run.remove_file("observables") for j, gamma in enumerate(gammas[i:]): # write the remaining values of observables as those corresponding to the delta = 0 # case, as non-zero d-band produces a higher energy fibril. scan['\\gamma_s'] = str(gamma) rp = ReadParams(scan=scan, loadsuf=loadsuf, savesuf=savesuf) run = SingleRun(rp, scan_dir=scan_dir) run.write_observables(E0, R0, eta0, delta0, surftwist0, "\\gamma_s") break if (np.isnan(Ri) or Ri <= 0) and gamma > 0.15: # if Ri is infinite, then the calculation failed. # Retry it with a different initial guess. print("Ri is NAN, trying again with Rguess = 1.0") # remove the current observables file, so that a new one can be written. run.remove_file("observables") if abs(float(scan['Rguess']) - 1.0) > 1e-10: Ri = 1.0 else: break else: # calculation ran smoothly.
def get_spike_rate_map(map_matrix, map_function, start_frames, end_frames, camera_positions = None, show_colorbar = True, title = None, ax = None, use_log = False, dummy = False, text_size = 48): delta_x = 187 # distance to leftmost x-coordinate from origin on field delta_y = 297 # distance to topmost y-coordinate from origin on field if dummy: img_map = map_matrix elif map_function == MAP_FUNCTION.threshold: img_map = np.full((355, 258), np.nan) for i in range(len(map_matrix)): for y in range(len(map_matrix[i])): for x in range(len(map_matrix[i, y])): if not np.isnan(map_matrix[i, y, x]): img_map[y, x] = map_matrix[i, y, x] elif map_function == MAP_FUNCTION.mean: img_map = np.nanmean(map_matrix, axis = 0) elif map_function == MAP_FUNCTION.min: img_map = np.nanmin(map_matrix, axis = 0) elif map_function == MAP_FUNCTION.max: img_map = np.nanmax(map_matrix, axis = 0) elif map_function == MAP_FUNCTION.std: img_map = np.nanstd(map_matrix, axis = 0) elif map_function == MAP_FUNCTION.var: img_map = np.nanvar(map_matrix, axis = 0) elif map_function == MAP_FUNCTION.percentile5: img_map = np.nanpercentile(map_matrix, 5, axis = 0) elif map_function == MAP_FUNCTION.percentile95: img_map = np.nanpercentile(map_matrix, 95, axis = 0) elif map_function == MAP_FUNCTION.median: img_map = np.nanpercentile(map_matrix, 50, axis = 0) elif map_function == MAP_FUNCTION.count: # count non-nan elements img_map = np.count_nonzero(~np.isnan(map_matrix), axis = 0) else: raise NotImplementedError('Map function not implemented!') if use_log: img_map = np.log1p(img_map) u = np.nanmean(img_map) std = np.nanstd(img_map) fig = figure(figsize = (15,15)) if ax is None else None im = None if ax: im = ax.imshow(img_map, vmin = max(0, u - 2 * std), vmax = u + 2 * std) else: im = plt.imshow(img_map, vmin = max(0, u - 2 * std), vmax = u + 2 * std) # camera positions if camera_positions is not None: cam_overlay = np.full((img_map.shape[0], img_map.shape[1], 4), [0,0,0,0], dtype = np.uint8) current_cmap = matplotlib.cm.get_cmap() current_cmap.set_bad(alpha=0) for i in range(len(start_frames)): start_frame = start_frames[i] end_frame = end_frames[i] for frame_num in range(start_frame, end_frame): y = delta_y - int(camera_positions[frame_num][2]) x = delta_x + int(camera_positions[frame_num][0]) if y < 0 or x < 0 or y >= img_map.shape[0] or x >= img_map.shape[1]: continue cam_overlay[y, x] = np.array([255, 0, 0, 255]) if ax: ax.imshow(cam_overlay) else: plt.imshow(cam_overlay) if title: if ax: ax.set_title(title, fontsize = text_size, y = 1.03) else: plt.suptitle(title, fontsize = text_size, y = 1.03) if ax: ax.axis('off') if show_colorbar: if ax: divider = make_axes_locatable(ax) cax = divider.append_axes('right', size='5%', pad=0.1) cbar = plt.colorbar(im, cax=cax, orientation='vertical') else: cbar = colorbar(im) cbar.ax.tick_params(labelsize = text_size) return fig, img_map, im
def stderror(v): non_nan = np.count_nonzero( ~np.isnan(v)) # number of valid (non NaN) elements in the vector return np.nanstd(v, ddof=1) / np.sqrt(non_nan)
columns=['phase', 'pvalue', 'kappa'], data=theta_mod['rem']) # filtering swr_mod swr = pd.DataFrame(index=swr.index, columns=swr.columns, data=gaussFilt(swr.values, (10, ))) # Cut swr_mod from -500 to 500 nbins = 200 binsize = 5 times = np.arange(0, binsize * (nbins + 1), binsize) - (nbins * binsize) / 2 swr = swr.loc[:, times] # CHECK FOR NAN tmp1 = swr.index[np.unique(np.where(np.isnan(swr))[0])] tmp2 = theta.index[theta.isnull().any(1)] # CHECK P-VALUE tmp3 = theta.index[(theta['pvalue'] > 0.01).values] tmp = np.unique(np.concatenate([tmp1, tmp2, tmp3])) # copy and delete if len(tmp): swr_modth = swr.drop(tmp) theta_modth = theta.drop(tmp) swr_modth_copy = swr_modth.copy() neuron_index = swr_modth.index swr_modth = swr_modth.values ############################################################################################################### # PCA
if len(name_new) > 1: fname_new = cm.save_memmap_join( name_new, base_name='Yr', n_chunks=12, dview=dview) else: print('One file only, not saving!') fname_new = name_new[0] #%% # fname_new='Yr_d1_501_d2_398_d3_1_order_F_frames_369_.mmap' Yr, dims, T = cm.load_memmap(fname_new) d1, d2 = dims images = np.reshape(Yr.T, [T] + list(dims), order='F') Y = np.reshape(Yr, dims + (T,), order='F') #%% if np.min(images) < 0: raise Exception('Movie too negative, add_to_movie should be larger') if np.sum(np.isnan(images)) > 0: raise Exception('Movie contains nan! You did not remove enough borders') #%% Cn = cm.local_correlations(Y[:, :, :1000]) pl.imshow(Cn, cmap='gray') #%% if not is_patches: #%% K = 35 # number of neurons expected per patch gSig = [7, 7] # expected half size of neurons merge_thresh = 0.8 # merging threshold, max correlation allowed p = 2 # order of the autoregressive system cnm = cnmf.CNMF(n_processes, method_init=init_method, k=K, gSig=gSig, merge_thresh=merge_thresh, p=p, dview=dview, Ain=None, method_deconvolution='oasis', skip_refinement=False) cnm = cnm.fit(images)
def __init__(self, field, params, interpolation='trilinear', integration='simple', h_min=2e-3, h_max=2e4, len_max=500, tol=1e-2, iter_max=1e3, xx=np.array([0, 0, 0])): """ Creates the traced streamline for a specified vector field field. call signature: Stream(field, p, interpolation='trilinear', integration='simple', h_min=2e-3, h_max=2e4, len_max=500, tol=1e-2, iter_max=1e3, xx=np.array([0,0,0])): Keyword arguments: *field*: Vector field which is integrated over. *params*: Simulation and tracer parameters. *interpolation*: Interpolation of the vector field. 'mean': Take the mean of the adjacent grid point. 'trilinear': Weigh the adjacent grid points according to their distance. *integration*: Integration method. 'simple': low order method. 'RK6': Runge-Kutta 6th order. *h_min*: Minimum step length for and underflow to occur. *h_max*: Parameter for the initial step length. *len_max*: Maximum length of the streamline. Integration will stop if l >= len_max. *tol*: Tolerance for each integration step. Reduces the step length if error >= tol. *iter_max*: Maximum number of iterations. *xx*: Initial seed. """ # Tentative streamline length. self.tracers = np.zeros([iter_max, 3], dtype='float32') tol2 = tol**2 dh = np.sqrt(h_max*h_min) # Initial step size. # Declare some vectors. xMid = np.zeros(3) xSingle = np.zeros(3) xHalf = np.zeros(3) xDouble = np.zeros(3) # Initialize the coefficient for the 6th order adaptive time step RK. a = np.zeros(6); b = np.zeros((6, 5)); c = np.zeros(6); cs = np.zeros(6) k = np.zeros((6, 3)) a[1] = 0.2; a[2] = 0.3; a[3] = 0.6; a[4] = 1; a[5] = 0.875 b[1, 0] = 0.2 b[2, 0] = 3/40.; b[2, 1] = 9/40. b[3, 0] = 0.3; b[3, 1] = -0.9; b[3, 2] = 1.2 b[4, 0] = -11/54.; b[4, 1] = 2.5; b[4, 2] = -70/27.; b[4, 3] = 35/27. b[5, 0] = 1631/55296.; b[5, 1] = 175/512.; b[5, 2] = 575/13824. b[5, 3] = 44275/110592.; b[5, 4] = 253/4096. c[0] = 37/378.; c[2] = 250/621.; c[3] = 125/594.; c[5] = 512/1771. cs[0] = 2825/27648.; cs[2] = 18575/48384.; cs[3] = 13525/55296. cs[4] = 277/14336.; cs[5] = 0.25 # Do the streamline tracing. self.tracers[0, :] = xx outside = False stream_len = 0 len = 0 if integration == 'simple': while ((len < len_max) and (stream_len < iter_max-1) and (not np.isnan(xx[0])) and (outside == False)): # (a) single step (midpoint method) xMid = xx + 0.5*dh*vec_int_no_var(xx, field, params, interpolation) xSingle = xx + dh*vec_int_no_var(xMid, field, params, interpolation) # (b) two steps with half stepsize xMid = xx + 0.25*dh*vec_int_no_var(xx, field, params, interpolation) xHalf = xx + 0.5*dh*vec_int_no_var(xMid, field, params, interpolation) xMid = xHalf + 0.25*dh*vec_int_no_var(xHalf, field, params, interpolation) xDouble = xHalf + 0.5*dh*vec_int_no_var(xMid, field, params, interpolation) # (c) Check error (difference between methods). dist2 = np.sum((xSingle-xDouble)**2) if dist2 > tol2: dh = 0.5*dh if abs(dh) < h_min: print "Error: stepsize underflow" break else: len += np.sqrt(np.sum((xx-xDouble)**2)) xx = xDouble.copy() if abs(dh) < h_min: dh = 2*dh stream_len += 1 self.tracers[stream_len, :] = xx.copy() if (dh > h_max) or (np.isnan(dh)): dh = h_max # Check if this point lies outside the domain. if ((xx[0] < params.Ox-params.dx) or (xx[0] > params.Ox+params.Lx+params.dx) or (xx[1] < params.Oy-params.dy) or (xx[1] > params.Oy+params.Ly+params.dy) or (xx[2] < params.Oz) or (xx[2] > params.Oz+params.Lz)): outside = True if integration == 'RK6': while ((len < len_max) and (stream_len < iter_max-1) and (not np.isnan(xx[0])) and (outside == False)): k[0, :] = dh*vec_int_no_var(xx, field, params, interpolation) k[1, :] = dh*vec_int_no_var(xx + b[1, 0]*k[0, :], field, params, interpolation) k[2, :] = dh*vec_int_no_var(xx + b[2, 0]*k[0, :] + b[2, 1]*k[1, :], field, params, interpolation) k[3, :] = dh*vec_int_no_var(xx + b[3, 0]*k[0, :] + b[3, 1]*k[1, :] + b[3, 2]*k[2, :], field, params, interpolation) k[4, :] = dh*vec_int_no_var(xx + b[4, 0]*k[0, :] + b[4, 1]*k[1, :] + b[4, 2]*k[2, :] + b[4, 3]*k[3, :], field, params, interpolation) k[5, :] = dh*vec_int_no_var(xx + b[5, 0]*k[0, :] + b[5, 1]*k[1, :] + b[5, 2]*k[2, :] + b[5, 3]*k[3, :] + b[5, 4]*k[4, :], field, params, interpolation) xNew = xx + c[0]*k[0, :] + c[1]*k[1, :] + c[2]*k[2, :] + \ c[3]*k[3, :] + c[4]*k[4, :] + c[5]*k[5, :] xNewS = xx + cs[0]*k[0, :] + cs[1]*k[1, :] + cs[2]*k[2, :] + \ cs[3]*k[3, :] + cs[4]*k[4, :] + cs[5]*k[5, :] delta2 = np.dot((xNew-xNewS), (xNew-xNewS)) delta = np.sqrt(delta2) if delta2 > tol2: dh = dh*(0.9*abs(tol/delta))**0.2 if abs(dh) < h_min: print "Error: step size underflow" break else: len += np.sqrt(np.sum((xx-xNew)**2)) xx = xNew if abs(dh) < h_min: dh = 2*dh stream_len += 1 self.tracers[stream_len, :] = xx if (dh > h_max) or (np.isnan(dh)): dh = h_max # Check if this point lies outside the domain. if ((xx[0] < params.Ox-params.dx) or (xx[0] > params.Ox+params.Lx+params.dx) or (xx[1] < params.Oy-params.dy) or (xx[1] > params.Oy+params.Ly+params.dy) or (xx[2] < params.Oz) or (xx[2] > params.Oz+params.Lz)): outside = True if (dh > h_max) or (delta == 0) or (np.isnan(dh)): dh = h_max self.tracers = np.resize(self.tracers, (stream_len, 3)) self.len = len self.stream_len = stream_len self.params = params
def balance(self, tournament, train_only=True, seed=0): """ Copy of data where specified eras have mean y of 0.5. Parameters ---------- tournament : int or str Which tournament's targets to balance. train_only : {True, False}, optional By default (True) only train eras are y balanced. No matter what the setting of `train_only` any era that contains a y that is NaN is not balanced. seed : int, optional Seed used by random number generator that selects which rows to keep. Default is 0. Returns ------- data : Data A copy of data where specified eras have mean y (for the given `tournament`) of 0.5. """ # This function is not written in a straightforward manner. # A few speed optimizations have been made. data = self if train_only: f = REGION_STR_TO_FLOAT['train'] eras = np.unique(data.era_float[data.region_float == f]).tolist() else: eras = data.unique_era(as_str=False).tolist() era = data.era_float y = data.y[tournament] index = np.arange(y.size) remove = [] rs = np.random.RandomState(seed) for e in eras: idx = era == e yi = y[idx] indexi = index[idx] n1 = yi.sum() if np.isnan(n1): continue n1 = int(n1) n0 = yi.size - n1 if n0 == n1: pass elif n0 > n1: ix = indexi[yi == 0] ix = rs.choice(ix, size=n0 - n1, replace=False) remove.append(ix) elif n0 < n1: ix = indexi[yi == 1] ix = rs.choice(ix, size=n1 - n0, replace=False) remove.append(ix) else: msg = "balance should not reach this line" # pragma: no cover raise RuntimeError(msg) # pragma: no cover idx = ~idx era = era[idx] y = y[idx] index = index[idx] if len(remove) == 0: data = data.copy() else: keep = set(range(data.shape[0])) - set(np.concatenate(remove)) keep = list(keep) df = data.df.take(keep) data = Data(df) return data