def candlestick_trades(samplet, lookback, t, px, sz): #requires = ["CONTIGUOUS", "ALIGNED"] lib = _load_candlestick_lib() lib.c_candlestick.restype = None lib.c_candlestick.argtypes = [np.ctypeslib.c_intp, np.ctypeslib.ndpointer(float, flags="aligned, contiguous"), ctypes.c_double, np.ctypeslib.c_intp, np.ctypeslib.ndpointer(float, ndim=1, flags="aligned, contiguous"), np.ctypeslib.ndpointer(float, ndim=1, flags="aligned, contiguous"), np.ctypeslib.ndpointer(float, ndim=1, flags="aligned, contiguous"), np.ctypeslib.ndpointer(float, ndim=1, flags="aligned, contiguous," "writeable")] # samplet = np.require(samplet, float, requires) # c = np.empty_like(a) samplelen = np.alen(samplet) datalen = np.alen(t) res = np.empty(6*samplelen) lib.c_candlestick(samplelen, samplet, lookback, datalen, t, px, sz, res) return res
def eccentricity(data, exponent=1., metricpar={}, callback=None): if data.ndim==1: assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.' ds = squareform(data, force='tomatrix') if exponent in (np.inf, 'Inf', 'inf'): return ds.max(axis=0) elif exponent==1.: ds = np.power(ds, exponent) return ds.sum(axis=0)/float(np.alen(ds)) else: ds = np.power(ds, exponent) return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent) else: progress = progressreporter(callback) N = np.alen(data) ecc = np.empty(N) if exponent in (np.inf, 'Inf', 'inf'): for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).max() progress((i+1)*100//N) elif exponent==1.: for i in range(N): ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N) progress((i+1)*100//N) else: for i in range(N): dsum = np.power(cdist(data[(i,),:], data, **metricpar), exponent).sum() ecc[i] = np.power(dsum/float(N), 1./exponent) progress((i+1)*100//N) return ecc
def get_data(self, orig): data = self.serializer.clipboard_data data_len = np.alen(data) orig_len = np.alen(orig) if data_len > orig_len > 1: data_len = orig_len return data[0:data_len]
def _do_problem(self, problem, integrator, old_api=True, **integrator_params): jac = None if hasattr(problem, 'jac'): jac = problem.jac res = problem.res ig = dae(integrator, res, jacfn=jac, old_api=old_api) ig.set_options(old_api=old_api, **integrator_params) z = empty((1+len(problem.stop_t),alen(problem.z0)), float) zprime = empty((1+len(problem.stop_t),alen(problem.z0)), float) ist = ig.init_step(0., problem.z0, problem.zprime0, z[0], zprime[0]) i=1 for time in problem.stop_t: soln = ig.step(time, z[i], zprime[i]) if old_api: flag, rt = soln else: flag = soln.flag rt = soln.values.t i += 1 if integrator == 'ida': assert flag==0, (problem.info(), flag) else: assert flag > 0, (problem.info(), flag) assert problem.verify(array(z), array(zprime), [0.]+problem.stop_t), \ (problem.info(),)
def get_data(self, orig): data = self.data data_len = np.alen(data) orig_len = np.alen(orig) if orig_len > data_len: reps = (orig_len // data_len) + 1 data = np.tile(data, reps) return data[0:orig_len]
def get_data(self, orig): bytes = self.data data_len = np.alen(bytes) orig_len = np.alen(orig) if orig_len > data_len: reps = (orig_len / data_len) + 1 bytes = np.tile(bytes, reps) return bytes[0:orig_len]
def tableSum(self): """docstring for tableSum""" self.fsum = 0 for i in xrange(numpy.alen(self.newx)): for j in xrange(numpy.alen(self._angles)): self.fsum += self.newx[i]*1.15*self._angles[j]*math.fabs(self.interpedgrid[i,j]) return self.fsum
def competence(stochastic): """ The competence function for TWalk. """ if stochastic.dtype in float_dtypes and np.alen(stochastic.value) > 4: if np.alen(stochastic.value) >=10: return 2 return 1 return 0
def create_binary(filename, num, outfile, options): """Create patterned binary data with the first 7 characters of the filename interleaved with a byte ramp, e.g. A128 \x00A128 \x01A128 \x02 etc. """ root, _ = outfile.split(".") prefix = ("%s " % root)[0:8] a = np.fromstring(prefix, dtype=np.uint8) b = np.tile(a, (num / np.alen(a)) + 1)[0:num] b[7::8] = np.arange(np.alen(b) / 8, dtype=np.uint8) with open(filename, "wb") as fh: fh.write(b.tostring())
def get_bitplanes(self, segment_viewer, bytes_per_row, nr, count, byte_values, style, colors): bitplanes = self.bitplanes _, rem = divmod(np.alen(byte_values), bitplanes) if rem > 0: byte_values = np.append(byte_values, np.zeros(rem, dtype=np.uint8)) style = np.append(style, np.zeros(rem, dtype=np.uint8)) pixels_per_row = 8 * bytes_per_row // bitplanes bits = np.unpackbits(byte_values).reshape((-1, 8)) pixels = np.empty((nr * bytes_per_row // bitplanes, pixels_per_row), dtype=np.uint8) self.get_bitplane_pixels(bits, pixels, bytes_per_row, pixels_per_row) pixels = pixels.reshape((nr, pixels_per_row)) s = self.get_bitplane_style(style) style_per_pixel = s.repeat(8).reshape((-1, pixels_per_row)) normal = (style_per_pixel & self.ignore_mask) == 0 highlight = (style_per_pixel & style_bits.selected_bit_mask) == style_bits.selected_bit_mask data = (style_per_pixel & style_bits.data_bit_mask) == style_bits.data_bit_mask comment = (style_per_pixel & style_bits.comment_bit_mask) == style_bits.comment_bit_mask match = (style_per_pixel & style_bits.match_bit_mask) == style_bits.match_bit_mask color_registers, h_colors, m_colors, c_colors, d_colors = colors bitimage = np.empty((nr, pixels_per_row, 3), dtype=np.uint8) for i in range(2**bitplanes): color_is_set = (pixels == i) bitimage[color_is_set & normal] = color_registers[i] bitimage[color_is_set & data] = d_colors[i] bitimage[color_is_set & comment] = c_colors[i] bitimage[color_is_set & match] = m_colors[i] bitimage[color_is_set & highlight] = h_colors[i] bitimage[count:,:,:] = segment_viewer.preferences.empty_background_color.Get(False) return bitimage
def do_change(self, editor, undo): self.prepare_data(editor) indexes = self.get_clipped_indexes(editor) data = self.get_data(self.segment.data[indexes]) log.debug("orig data: %s" % self.segment.data[indexes]) log.debug("new data: %s" % data) indexes = indexes[0:np.alen(data)] log.debug("indexes truncated to data length: %s" % str(indexes)) s = self.serializer if s.clipboard_relative_comment_indexes is not None: log.debug("relative comment indexes: %s" % (str(s.clipboard_relative_comment_indexes))) subset = s.clipboard_relative_comment_indexes[s.clipboard_relative_comment_indexes < np.alen(indexes)] log.debug("comment index subset: %s" % str(subset)) comment_indexes = indexes[subset] log.debug("new comment indexes: %s" % str(comment_indexes)) clamped_ranges = indexes_to_ranges(indexes) log.debug("clamped ranges: %s" % str(clamped_ranges)) old_comment_info = self.segment.get_comment_restore_data(clamped_ranges) else: old_comment_info = None undo.flags.index_range = indexes[0], indexes[-1] undo.flags.select_range = True undo.flags.byte_values_changed = True old_data = self.segment[indexes].copy() self.segment[indexes] = data style = self.get_style(data) if style is not None: old_style = self.segment.style[indexes].copy() self.segment.style[indexes] = style else: old_style = None if old_comment_info is not None: log.debug("setting comments: %s" % s.clipboard_comments) self.segment.set_comments_at_indexes(clamped_ranges, comment_indexes, s.clipboard_comments) return (old_data, indexes, old_style, old_comment_info)
def weightedregionprops(L,ncc,dfore): if DEBUG_TRACKINGSETTINGS: print 'in weightedregionprops, ncc = ' + str(ncc) + ', max(L) = ' + str(num.max(L)) + ', nnz(L) = ' + str(num.flatnonzero(L).shape) + ', sum(dfore) = ' + str(num.sum(num.sum(dfore))) if DEBUG_TRACKINGSETTINGS: for l in range(1,num.max(L)+1): print 'nnz(L == %d) = '%l + str(num.alen(num.flatnonzero(L==l))) if ncc == 0: return [] # all connected components index = range(1,ncc+1) time0 = time.time() # create the unnormalized weight matrix w = dfore #w[L==0] = 0 # compute the normalization terms z = num.array(meas.sum(w,L,index),ndmin=1) z[z==0] = 1 # compute the unnormalized centers cx = num.array(meas.sum(w*params.GRID.X,L,index),ndmin=1) cy = num.array(meas.sum(w*params.GRID.Y,L,index),ndmin=1) # normalize centers cx /= z cy /= z # compute unnormalized, uncentered variances cx2 = num.array(meas.sum(w*params.GRID.X2,L,index),ndmin=1) cy2 = num.array(meas.sum(w*params.GRID.Y2,L,index),ndmin=1) cxy = num.array(meas.sum(w*params.GRID.XY,L,index),ndmin=1) # normalize variances cx2 /= z cy2 /= z cxy /= z # center variances cx2 -= cx**2 cy2 -= cy**2 cxy -= cx*cy # create ellipses ellipses = [] for i in range(len(cx)): # compute major, minor, angle from cov (sizeH,sizeW,angle) = cov2ell2(cx2[i],cy2[i],cxy[i]) if (sizeH < .125) or num.isnan(sizeH): sizeH = .125 if (sizeW < .125) or num.isnan(sizeW): sizeW = .125 # compute area area = num.pi * sizeW * sizeH * 4 ellipses.append(Ellipse(cx[i],cy[i],sizeW,sizeH,angle,area,-1)) return ellipses
def fit_model(self): if self.similarity_matrix is None: self._init_similarity_matrix() self.means = [] for i in xrange(self.dataset.n_items): i_ = self.item_user_matrix[i][self.item_user_matrix[i] > 0] self.means.append(np.mean(i_) if not np.alen(i_) == 0 else 0)
def add_xexboot_header(bytes, bootcode=None, title="DEMO", author="an atari user"): sec_size = 128 xex_size = len(bytes) num_sectors = (xex_size + sec_size - 1) / sec_size padded_size = num_sectors * sec_size if xex_size < padded_size: bytes = np.append(bytes, np.zeros([padded_size - xex_size], dtype=np.uint8)) paragraphs = padded_size / 16 if bootcode is None: bootcode = np.fromstring(xexboot_header, dtype=np.uint8) else: # don't insert title or author in user supplied bootcode; would have to # assume that the user supplied everything desired in their own code! title = "" author = "" bootsize = np.alen(bootcode) v = bootcode[9:11].view(dtype="<u2") v[0] = xex_size bootsectors = np.zeros([384], dtype=np.uint8) bootsectors[0:bootsize] = bootcode insert_string(bootsectors, 268, title, 0b11000000) insert_string(bootsectors, 308, author, 0b01000000) image = np.append(bootsectors, bytes) return image
def compressed_submatrix(dm, idx): ''' Extract from a compressed distance matrix the corresponding matrix for a subset of points without bringing the matrix into square form first. The indices in the list C{idx} must be in increasing order. @param dm: compressed distance matrix @type dm: numpy.ndarray(N*(N-1)/2, dtype=float) @param idx: indices of the subset @type idx: numpy.ndarray(n, dtype=int) @param N: the number of observation in C{dm} (optional) @type N: integer @return: compressed distance matrix @rtype: numpy.ndarray(n*(n-1)/2, dtype=float) ''' N = n_obs(dm) n = np.alen(idx) res = np.empty(n*(n-1)//2,dtype=dm.dtype) # Shorter Python code, does the same thing. # Which variant is faster? # #for i,c in enumerate(combinations(idx,2)): # res[i] = dm[compressed_idx(N,*c)] for r in range(n-1): s = (2*n-1-r)*r//2 t = idx[r] i = idx[r+1:] + (2*N-3-t)*t//2-1 res[s:s+n-1-r] = dm[i] return res
def distance_to_measure(data, k, metricpar={}, callback=None): r'''.. math:: \mathit{distance\_to\_measure}(x) = \sqrt{\frac 1k\sum^k_{j=1}d(x,\nu_j(x))^2}, where :math:`\nu_1(x),\ldots,\nu_k(x)` are the :math:`k` nearest neighbors of :math:`x` in the data set. Again, the first nearest neighbor is :math:`x` itself with distance 0. Reference: [R4]_. ''' if data.ndim==1: assert metricpar=={}, ('No optional parameter is allowed for a ' 'dissimilarity matrix.') # dm data ds = squareform(data, force='tomatrix') N = np.alen(ds) r = np.empty(N) for i in range(N): s = np.sort(ds[i,:]) assert s[0]==0. d = s[1:k] r[i] = np.sqrt((d*d).sum()/float(k)) return r else: # vector data if metricpar=={} or metricpar['metric']=='euclidean': from scipy.spatial import cKDTree T = cKDTree(data) d, j = T.query(data, k+1) d = d[:,1:k] return np.sqrt((d*d).sum(axis=1)/k) else: print(kwargs) raise ValueError('Not implemented')
def testLBP (format, formatMask, path, output) : dataset = pd.read_csv(path) idxCls = dataset['idx'] # cnts = dataset['Cnt'] fnList = dataset['path'] # out = open(output, 'w') lbps = list(map(lambda x: local_binary_pattern(cv2.bitwise_and(imread(format.format(x)),imread(formatMask.format(x))), lbpP, lbpR, lbpMethod), fnList)) histograms = list(map(lambda x: np.histogram(x, bins=range(int(np.max(lbps)) + 1))[0], lbps)) distances = prw.pairwise_distances(histograms, metric='l1') np.fill_diagonal(distances, math.inf) guessedClasses = np.apply_along_axis(lambda x: np.argmin(x), 1, distances) scores = np.apply_along_axis(lambda x: np.min(x), 1, distances) correct = list(map(lambda i: idxCls[guessedClasses[i]] == idxCls[i], range(0, np.alen(idxCls)))) # out.write(str(np.average(correct))) # fpr, tpr, thresholds = roc_curve(correct, scores, pos_label=1) # pyplot.plot(tpr, fpr) # pyplot.show() with open(output + 'lbp_distances.csv', 'w', newline='') as fp: a = csv.writer(fp, delimiter=',') a.writerows(distances) with open(output + 'lbp_guessedClasses.csv', 'w', newline='') as fp: a = csv.writer(fp, delimiter=',') a.writerow(guessedClasses) with open(output + 'lbp_correct.csv', 'w', newline='') as fp: a = csv.writer(fp, delimiter=',') a.writerow(correct) with open(output + 'lbp_real.csv', 'w', newline='') as fp: a = csv.writer(fp, delimiter=',') a.writerow(idxCls)
def circ_interp(x, y, num, long_arc=False): """ Given two vectors of angle values in radians, perform circular interpotation between them. :param x: a vector of angle values (in radians) :param y: a vector of angle values (in radians) :param num: the number of interpolated values :param long_arc: interpolate through the long arc :type x: numpy.ndarray :type y: numpy.ndarray :type num: int :type long_arc: bool :return: a matrix of original and interpolated values, the original vectors x and y are in the first and the last matrix columns :rtype: numpy.ndarray """ delta = circ_dist(x, y) n = np.alen(x) interpolation_mask = np.reshape( np.repeat(np.array(range(0, num + 2)), n), (num+2, n)).transpose()/float(num + 1) result = np.repeat(x, num + 2).reshape(n, num + 2) delta = np.repeat(delta, num + 2).reshape(n, num + 2) if not long_arc: result += delta * interpolation_mask else: result += -np.sign(delta) * (2*np.pi - np.abs(delta)) * \ interpolation_mask return result
def recall_gain(tp, fn, fp, tn): """Calculates Recall Gain from the contingency table This function calculates Recall Gain from the entries of the contingency table: number of true positives (TP), false negatives (FN), false positives (FP), and true negatives (TN). More information on Precision-Recall-Gain curves and how to cite this work is available at http://www.cs.bris.ac.uk/~flach/PRGcurves/. Args: tp (float) or ([float]): True Positives fn (float) or ([float]): False Negatives fp (float) or ([float]): False Positives tn (float) or ([float]): True Negatives Returns: (float) or ([float]) """ n_pos = tp + fn n_neg = fp + tn with np.errstate(divide='ignore', invalid='ignore'): rg = 1. - (n_pos/n_neg) * (fn/tp) if np.alen(rg) > 1: rg[tn + fn == 0] = 1 elif tn + fn == 0: rg = 1 return rg
def window(C_t, type=None, width=None, mirror=False): """ Filters (half-sided) time series C(t) through one of two different window functions. The implemented functions are cos(π/2 t/T) and exp[-1/2 (t/λT)²]. Appropriate window functions must have an area of unity in frequency domain, which corresponds to a value of one at time zero in time domain. for Gaussian: width λ = σ / T """ len = np.alen(C_t) if width == np.inf: type = None if type == 'cos': if width == None: width = 1.0 cut = np.floor(len * width) C = C_t * np.append(np.cos(0.5 * np.pi * np.linspace(0., 1., cut)), np.zeros(len - cut)) elif type == 'exp': if width is None: width = 0.3 C = C_t * np.exp(-0.5 * (np.linspace(0., 1., len) / width)**2) elif type is None or type == 'None': C = C_t else: sys.exit("\nError: Window type '{0}' unknown. Exit Program!".format(type)) if mirror: # explicit mirroring, e.g. [1., 0.5, 0.] -> [0., 0.5, 1., 0.5] return np.append(C[::-1].conj(), C[1:-1]) # C(-t) = C*(t) else: return C
def fill_masked_regions(self, themap, magic=N.inf): """Fill masked regions (defined where values == magic) in themap. """ masked_boxes = N.where(themap == magic) # locations of masked regions for i in range(N.size(masked_boxes,1)): num_unmasked = 0 x, y = masked_boxes[0][i], masked_boxes[1][i] delx = dely = 1 while num_unmasked == 0: x1 = x - delx if x1 < 0: x1 = 0 x2 = x + 1 + delx if x2 > themap.shape[0]: x2 = themap.shape[0] y1 = y - dely if y1 < 0: y1 = 0 y2 = y + 1 + dely if y2 > themap.shape[1]: y2 = themap.shape[1] cutout = themap[x1:x2, y1:y2].ravel() goodcutout = cutout[cutout != magic] num_unmasked = N.alen(goodcutout) if num_unmasked > 0: themap[x, y] = N.nansum(goodcutout)/float(len(goodcutout)) delx += 1 dely += 1 themap[N.where(N.isnan(themap))] = 0.0 return themap
def learn_option(option, environment_name, num_episodes, max_steps): """ :param source: the source community :type source: int :param target: the target community :param target: int """ from pyrl.agents.sarsa_lambda import sarsa_lambda from pyrl.rlglue import RLGlueLocal as RLGlueLocal from pyrl.environments.pinball import PinballRLGlue import numpy as np import logging import pyflann import options import cPickle import random import csv prefix = 'option-%d-to-%d'%(option.label, option.target) score_file = csv.writer(open(prefix + '-score.csv', 'wb')) # Create agent and environments agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9, params={'name':'fourier', 'order':4}) # Wrap the environment with the option's pseudo-reward environment = options.TrajectoryRecorder(options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory') # Connect to RL-Glue rlglue = RLGlueLocal.LocalGlue(environment, agent) rlglue.RL_init() # Execute episodes if not num_episodes: num_episodes = np.alen(option.initial_states) print 'Learning %d episodes'%(num_episodes,) for i in xrange(num_episodes): initial_state = option.initial_state() rlglue.RL_env_message('set-start-state %f %f %f %f' %(initial_state[0], initial_state[1], initial_state[2], initial_state[3])) terminated = rlglue.RL_episode(max_steps) total_steps = rlglue.RL_num_steps() total_reward = rlglue.RL_return() with open(prefix + '-score.csv', 'a') as f: writer = csv.writer(f) writer.writerow([i, total_steps, total_reward, terminated]) rlglue.RL_cleanup() # Save function approximation option.basis = agent.basis option.weights = agent.weights[0,:,:] cPickle.dump(option, open(prefix + '-policy.pl', 'wb')) return option
def kNN_distance(data, k, metricpar={}, callback=None): r'''The distance to the :math:`k`-th nearest neighbor as an (inverse) measure of density. Note how the number of nearest neighbors is understood: :math:`k=1`, the first neighbor, makes no sense for a filter function since the first nearest neighbor of a data point is always the point itself, and hence this filter function is constantly zero. The parameter :math:`k=2` measures the distance from :math:`x_i` to the nearest data point other than :math:`x_i` itself. ''' if data.ndim==1: assert metricpar=={}, ('No optional parameter is allowed for a ' 'dissimilarity matrix.') # dm data ds = squareform(data, force='tomatrix') N = np.alen(ds) r = np.empty(N) for i in range(N): s = np.sort(ds[i,:]) assert s[0]==0. r[i] = s[k] return r else: # vector data if metricpar=={} or metricpar['metric']=='euclidean': from scipy.spatial import cKDTree T = cKDTree(data) d, j = T.query(data, k+1) return d[:,k] else: print(metricpar) raise ValueError('Not implemented')
def __init__(self, pos): vals = np.array([np.float(val) for val in pos.split(";")]) numOfVariables = (np.alen(vals) - 3) / 4 """ self.particlePosition = [np.float64(val) for val in vals[0].split(",")]; self.velocity = [np.float64(val) for val in vals[1].split(",")]; self.fitness = [np.float64(val) for val in vals[2].split(",")]; self.persBestPos = [np.float64(val) for val in vals[3].split(",")]; self.persBestVal = [np.float64(val) for val in vals[4].split(",")]; self.globalBestPos = [np.float64(val) for val in vals[5].split(",")]; self.globalBestVal = [np.float64(val) for val in vals[6].split(",")]; """ index = 0 self.particlePosition = vals[index : index + numOfVariables] index += numOfVariables self.velocity = vals[index : index + numOfVariables] index += numOfVariables self.fitness = vals[index : index + 1] index += 1 self.persBestPos = vals[index : index + numOfVariables] index += numOfVariables self.persBestVal = vals[index : index + 1] index += 1 self.globalBestPos = vals[index : index + numOfVariables] index += numOfVariables self.globalBestVal = vals[index : index + 1]
def plot_counts(ax, dictorigin, x_locator, x_formatter, bin_edges_in, snum, enum): # compute all data needed time = dictorigin["time"] cumcounts = np.arange(1, np.alen(time) + 1) if len(bin_edges_in) < 2: return binsize = bin_edges_in[1] - bin_edges_in[0] binsize_str = binsizelabel(binsize) # plot counts, bin_edges_out, patches = ax.hist( time, bin_edges_in, cumulative=False, histtype="bar", color="black", edgecolor=None ) ax.grid(True) ax.xaxis_date() plt.setp(ax.get_xticklabels(), rotation=90, horizontalalignment="center", fontsize=7) ax.set_ylabel("# Earthquakes\n%s" % binsize_str, fontsize=8) ax.xaxis.set_major_locator(x_locator) ax.xaxis.set_major_formatter(x_formatter) if snum and enum: ax.set_xlim(snum, enum) ax2 = ax.twinx() p2, = ax2.plot(time, cumcounts, "g", lw=2.5) ax2.yaxis.get_label().set_color(p2.get_color()) ytl_obj = plt.getp(ax2, "yticklabels") # get the properties for yticklabels # plt.getp(ytl_obj) # print out a list of properties plt.setp(ytl_obj, color="g") # set the color of yticks to red plt.setp(plt.getp(ax2, "yticklabels"), color="g") # xticklabels: same ax2.set_ylabel("Cumulative\n# Earthquakes", fontsize=8) ax2.xaxis.set_major_locator(x_locator) ax2.xaxis.set_major_formatter(x_formatter) if snum and enum: ax2.set_xlim(snum, enum) return
def availability(self): availability={} for key in self.magnet_sets: availability[key]=range(np.alen(self.magnet_sets[key])) return availability
def ExpectationMaximization(dataset): # dimension of the space N = np.alen(dataset[0]) m = 10 minw = 0.01 minsigma = 0.01 # mu: esperance # sigma2: variance # w: mixing weight mu, sigma2, w = initParameters(m, N) epsi = 0.1 conv = False while not conv: Elikelihood = 0 # for each mixture component for j in range(m): # Expectation # gamma: responsibility values gamma = w[j] * gaussian2(dataset, mu[j], sigma2[j], N) Nwj = np.sum(gamma) gamma = gamma/Nwj # Maximization (of the likelihood) gammat = np.array([gamma]).T mu[j] = np.sum( gammat * dataset, 0 ) / Nwj sigma2[j] = np.sum( gammat * ((dataset - mu[j]) ** 2), 0 ) / Nwj w[j] = Nwj/N # prevent variances from reaching 0 sigma2[j] = map(lambda sig2: sig2 * (sig2 >= minsigma) or minsigma, sigma2[j]) # prevent mixin coefficient from reaching 0 if w[j] < minw: w[j] = minw Elikelihood -= np.log(Nwj) print Elikelihood conv = np.abs(Elikelihood) < epsi return (w, mu, sigma2)
def parse_tile_map(self, panel, tile_map): sizer = panel.GetSizer() sizer.Clear(True) self.tile_map = tile_map self.categories = [] self.items = [] self.pattern_to_item = {} for items in tile_map: label = items[0] t = wx.StaticText(panel, -1, label) sizer.Add(t, 0, wx.EXPAND, 0) self.categories.append(t) w = wx.WrapSizer() for tiles in items[1:]: for i in np.arange(np.alen(tiles)): data = tiles[i:i+1] bmp = self.segment_viewer.machine.antic_font.get_image(data[0], self.zoom) btn = TileButton(panel, -1, bmp, style=wx.BORDER_NONE|wx.BU_EXACTFIT) btn.SetBackgroundColour(self.bg) btn.tile_data = data btn.Bind(wx.EVT_BUTTON, self.on_tile_clicked) w.Add(btn, 0, wx.ALL, 0) self.items.append(btn) self.pattern_to_item[tuple(data)] = btn sizer.Add(w, 0, wx.EXPAND, 0) self.Layout()
def create_segments(labels, pos_scores, neg_scores): n = np.alen(labels) # reorder labels and pos_scores by decreasing pos_scores, using increasing neg_scores in breaking ties new_order = np.lexsort((neg_scores, -pos_scores)) labels = labels[new_order] pos_scores = pos_scores[new_order] neg_scores = neg_scores[new_order] # create a table of segments segments = {'pos_score': np.zeros(n), 'neg_score': np.zeros(n), 'pos_count': np.zeros(n), 'neg_count': np.zeros(n)} j = -1 for i, label in enumerate(labels): if ((i == 0) or (pos_scores[i-1] != pos_scores[i]) or (neg_scores[i-1] != neg_scores[i])): j += 1 segments['pos_score'][j] = pos_scores[i] segments['neg_score'][j] = neg_scores[i] if label == 0: segments['neg_count'][j] += 1 else: segments['pos_count'][j] += 1 segments['pos_score'] = segments['pos_score'][0:j+1] segments['neg_score'] = segments['neg_score'][0:j+1] segments['pos_count'] = segments['pos_count'][0:j+1] segments['neg_count'] = segments['neg_count'][0:j+1] return segments
def classify(data, trueclass, traindata, final_set,a): X=np.vstack(data[traindata[:,1],:]) #np.savetxt("parkinsons/foo.csv",x, fmt='%0.5f',delimiter=",") b=[] b.append(traindata[:,1]) C = np.searchsorted(a, b) D = np.delete(np.arange(np.alen(a)), C) D= np.array(D) D=D.reshape(D.size,-1) true_labels = np.ravel(np.vstack(trueclass[D[:,0],0])) test_data = np.vstack(data[D[:,0],:]) #print test_data.shape #np.savetxt("parkinsons/foo.csv",test_data, fmt='%0.6s') y=np.ravel(np.vstack(traindata[:,0])) clf=svm.SVC(kernel='linear') clf.fit(X,y) labels=clf.predict(test_data) #predicting true labels for the remaining rows predicted_labels = labels.reshape(labels.size,-1) np.savetxt("parkinsons/foo%d.csv"%final_set, np.concatenate((test_data, predicted_labels,np.vstack(trueclass[D[:,0],0])), axis=1),fmt='%0.5f',delimiter=",") print true_labels print labels misclassify_rate = 1-accuracy_score(true_labels,labels) print "Misclassification rate = %f" %misclassify_rate return misclassify_rate
def clean(self, mask=None, verbose=None): """ Given the mask, we replace the actual problematic pixels with the masked 5x5 median value. This mimics what is done in L.A.Cosmic, but it's a bit harder to do in python, as there is no readymade masked median. So for now we do a loop... Saturated stars, if calculated, are also masked : they are not "cleaned", but their pixels are not used for the interpolation. We will directly change self.cleanimage. Instead of using the self.mask, you can supply your own mask as argument. This might be useful to apply this cleaning function iteratively. But for the true L.A.Cosmic, we don't use this, i.e. we use the full mask at each iteration. """ if verbose == None: verbose = self.verbose if mask == None: mask = self.mask if verbose: print "Cleaning cosmic affected pixels ..." # So... mask is a 2D array containing False and True, where True means "here is a cosmic" # We want to loop through these cosmics one by one. cosmicindices = np.argwhere(mask) # This is a list of the indices of cosmic affected pixels. #print cosmicindices # We put cosmic ray pixels to np.Inf to flag them : self.cleanarray[mask] = np.Inf # Now we want to have a 2 pixel frame of Inf padding around our image. w = self.cleanarray.shape[0] h = self.cleanarray.shape[1] padarray = np.zeros((w + 4, h + 4)) + np.Inf padarray[2:w + 2, 2:h + 2] = self.cleanarray.copy( ) # that copy is important, we need 2 independent arrays # The medians will be evaluated in this padarray, skipping the np.Inf. # Now in this copy called padarray, we also put the saturated stars to np.Inf, if available : if self.satstars != None: padarray[2:w + 2, 2:h + 2][self.satstars] = np.Inf # Viva python, I tested this one, it works... # A loop through every cosmic pixel : for cosmicpos in cosmicindices: x = cosmicpos[0] y = cosmicpos[1] cutout = padarray[ x:x + 5, y:y + 5].ravel() # remember the shift due to the padding ! #print cutout # Now we have our 25 pixels, some of them are np.Inf, and we want to take the median goodcutout = cutout[cutout != np.Inf] #print np.alen(goodcutout) if np.alen(goodcutout) >= 25: # This never happened, but you never know ... raise RuntimeError, "Mega error in clean !" elif np.alen(goodcutout) > 0: replacementvalue = np.median(goodcutout) else: # i.e. no good pixels : Shit, a huge cosmic, we will have to improvise ... print "OH NO, I HAVE A HUUUUUUUGE COSMIC !!!!!" replacementvalue = self.guessbackgroundlevel() # We update the cleanarray, # but measure the medians in the padarray, so to not mix things up... self.cleanarray[x, y] = replacementvalue # That's it. if verbose: print "Cleaning done" # FYI, that's how the LACosmic cleaning looks in iraf : """
import numpy as np from DataHandler import DataHandler data_handler = DataHandler(16, 4, "state_responses.csv") status = True count = 1 while status: status, features, labels = data_handler.get_next_batch(20000) print status print np.alen(features) print np.alen(labels) print str(count) print "\n\n" count = count + 1
else: #Add experience to memory memoryS = np.concatenate((memoryS,tempGameS),axis=0) memoryRR = np.concatenate((memoryRR,tempGameRR),axis=0) memoryA = np.concatenate((memoryA,tempGameA),axis=0) memorySA = np.concatenate((memorySA,tempGameSA),axis=0) memoryR = np.concatenate((memoryR,tempGameR),axis=0) memoryW = np.concatenate((memoryW,tempGameW),axis=0) if gameR.mean() > max_game_average : max_game_average = gameR.mean() #if memory is full remove first element if np.alen(memoryR) >= max_memory_len: memorySA = memorySA[gameR.shape[0]:] memoryR = memoryR[gameR.shape[0]:] memoryA = memoryA[gameR.shape[0]:] memoryS = memoryS[gameR.shape[0]:] memoryRR = memoryRR[gameR.shape[0]:] memoryW = memoryW[gameR.shape[0]:] qs=s if done and game > num_initial_observation and not PLAY_GAME: last_game_average = gameR.mean() if game > 3 and game %2 ==0: # train on all memory print("Experience Replay")
def __len__(self): return np.alen(self.order)
def run_case(): print('--> Load training set') x_t = sio.loadmat('../datasets/compact_uber.mat')['x_t'] # training inputs y_t = sio.loadmat( '../datasets/compact_uber.mat')['y_t'] - np.pi # training outputs n_data = np.alen(y_t) print('--> Load validation set') x_v = sio.loadmat('../datasets/compact_uber.mat')[ 'x_v'] # validation inputs y_v = sio.loadmat( '../datasets/compact_uber.mat')['y_v'] - np.pi # validation outputs print('--> Load prediction set') x_p = sio.loadmat('../datasets/compact_uber.mat')[ 'x_p'] # prediction inputs n_pred = np.alen(x_p) n_totp = n_data + n_pred print('--> Calculate kernel') # Set kernel parameters noise = 1E-1 params = { 's2': 250.00, 'ell2': 5.0E+1**2, } k1 = np.array([[4.0]]) k2 = np.array([[7.0]]) # previously 7.0 y_t = y_t.reshape(n_data, 1) x_t = x_t.reshape(n_data, 1) x_p = x_p.reshape(n_pred, 1) x = np.vstack((x_p, x_t)) # Calculate kernels mat_k_cc = kernels.se_iso(x, x, params) mat_k_ss = kernels.se_iso(x, x, params) mat_k = np.bmat([[mat_k_cc, np.zeros_like(mat_k_cc)], [np.zeros_like(mat_k_ss), mat_k_ss]]) mat_k = np.asarray(mat_k) mat_k += noise * np.eye(mat_k.shape[0]) # Find inverse mat_ell = la.cholesky(mat_k, lower=True) mat_kin = la.solve(mat_ell.T, la.solve(mat_ell, np.eye(mat_ell.shape[0]))) print('--> Initialising model variables') psi_p = (2 * np.random.rand(n_pred, 1) - 1) * 2 mf_k1 = np.log(np.random.rand(n_totp, 1) * 0.1) mf_m1 = (2 * np.random.rand(n_totp, 1) - 1) * 2 n_var = psi_p.shape[0] + mf_k1.shape[0] + mf_m1.shape[0] idx = np.arange(0, n_var) config = { 'N_data': n_data, 'N_pred': n_pred, 'c_data': np.cos(y_t), 's_data': np.sin(y_t), 'c_2data': np.cos(2 * y_t), 's_2data': np.sin(2 * y_t), 'Kinv': mat_kin, 'idx_psi_p': idx[0:psi_p.shape[0]], 'idx_mf_k1': idx[psi_p.shape[0]:psi_p.shape[0] + mf_k1.shape[0]], 'idx_mf_m1': idx[psi_p.shape[0] + mf_k1.shape[0]:psi_p.shape[0] + mf_k1.shape[0] + mf_m1.shape[0]], 'k1': k1, 'k2': k2, } xin = np.vstack((psi_p, mf_k1, mf_m1)) print('--> Starting optimisation') t0 = time() results = mgvm.vi.inference_model_opt(xin, config) tf = time() print 'Total elapsed time: ' + str(tf - t0) + ' s' print results.message # Keep all values between -pi and pi new_psi_p = uc.cfix(results.x[config['idx_psi_p']]) # Predictions print('--> Saving and displaying results') holl_score = 0. for ii in xrange(0, n_pred): m1_idx = new_psi_p[ii] m2_idx = new_psi_p[ii] holl_score += uc.holl(y_v[ii], m1_idx, m2_idx, k1, k2) print 'HOLL score: ' + str(holl_score)
return z if __name__ == "__main__": pkls_path = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/' result_path = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/' filename = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/l_#f1_gonogo80record__r(red)_left(blue)_190707.pkl' #print(filename + ' start!!\r') data = unpickle(filename) odor1 = data['odor1'] odor2 = data['odor2'] print(np.max(odor2), np.min(odor2)) name = 'l_#f1_gonogo80record__r(red)_left(blue)_190707' result_name = result_path + name + '_mean_result' for k in range(np.alen(odor1)): odor1[k, :] = z_score(odor1[k, 0:100], odor1[k, :]) for k in range(np.alen(odor2)): odor2[k, :] = z_score(odor2[k, 0:100], odor2[k, :]) cue1_mean_cal = np.zeros(700) cue1_error_cal = np.zeros(700) cue2_mean_cal = np.zeros(700) cue2_error_cal = np.zeros(700) tr1, _ = np.shape(odor1) tr2, _ = np.shape(odor2) for k in range(700): cue1_mean_cal[k] = np.mean(odor1[:, k]) cue1_error_cal[k] = np.std(odor1[:, k]) / np.sqrt(tr1) cue2_mean_cal[k] = np.mean(odor2[:, k]) cue2_error_cal[k] = np.std(odor2[:, k]) / np.sqrt(tr2)
def encoder_run(spa): train_data = base_path + '/Dataset/ws/train/sparseness%d/training%d.txt' % ( spa, case) test_data = base_path + '/Dataset/ws/test/sparseness%d/test%d.txt' % (spa, case) W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case) loc_path = base_path + '/Dataset/ws' values_path = base_path + '/Dataset/loc_ae_values/spa%d' % (spa) if isUserAutoEncoder: loc_path += '/user_info.txt' else: loc_path += '/ws_info.txt' print('开始实验,稀疏度=%d,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_data, dtype=float) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('转换数据到矩阵开始') tnow = time.time() u = trdata[:, 0] s = trdata[:, 1] u = np.array(u, int) s = np.array(s, int) R = np.full(us_shape, NoneValue, float) R[u, s] = trdata[:, 2] del trdata, u, s print('转换数据到矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('地域信息加载开始') tnow = time.time() lp = Location.LocationProcesser(loc_path) print('地域信息加载结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('预处理数据开始') tnow = time.time() R = preprocess(R) print('预处理数据结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) # print ('选取特定地域数据开始'); # tnow = time.time(); # lae = LocBPAE.LocAutoEncoder(lp,40,R,hidden_node, # [actfunc1,deactfunc1, # actfunc1,deactfunc1],isUserAutoEncoder); # # loc_name = None; # loc_index = lae.loc_aes[loc_name][0]; # loc_index = np.array(loc_index)-1; # if isUserAutoEncoder: # R = R[loc_index,:]; # else: # R = R[:,loc_index]; # print ('选取特定地域数据结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print('训练模型开始') tnow = time.time() lae = LocBPAE.LocAutoEncoder(lp, oeg, R, hidden_node, [actfunc1, deactfunc1, actfunc1, deactfunc1], isUserAutoEncoder) if not isUserAutoEncoder: R = R.T if loadvalues and lae.exitValue(values_path, name_list_train): lae.loadValue(values_path, name_list_train) if continue_train: lae.train_by_names(name_list_train, learn_param, repeat, values_path) # lae.saveValues(values_path); lae.loadValue(values_path, name_list_pr) PR = np.zeros_like(R) for i in range(len(name_list_pr) - 1, -1, -1): n = name_list_pr[i] nind = lae.getIndexByLocName(n) tPR = lae.fill(n, R[nind, :]) PR[nind, :] = tPR if not isUserAutoEncoder: R = R.T PR = PR.T print(R) print() print(PR) print() ############# PR 还原处理 ############### PR = PR * 20.0 R = R * 20.0 # for i in range(PR.shape[0]): # for j in range(PR.shape[1]): # if R[i,j]!=NoneValue: # PR[i,j]=R[i,j]; PR = np.where(R != NoneValue, R, PR) print(PR) ############# PR 还原处理 ############### # if isUserAutoEncoder: # PR = PR.T; print('训练模型开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载测试数据开始') tnow = time.time() trdata = np.loadtxt(test_data, dtype=float) n = np.alen(trdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), n)) print('评测开始') tnow = time.time() tmp_vect = [] lp_vect = [] for n in name_list_pr: tmp_vect.append([ lae.getIndexByLocName(n), 0.0, # mae 0.0, # rmse 0 ]) # for n in lae.loc_aes: lp_vect.append([ lae.getIndexByLocName(n), 0.0, # mae 0.0, # rmse 0, n ]) # mae = 0.0 rmse = 0.0 cot = 0 for tc in trdata: uid = int(tc[0]) sid = int(tc[1]) if tc[2] <= 0: continue rt = PR[uid, sid] tm = abs(rt - tc[2]) trm = (rt - tc[2])**2 mae += tm rmse += trm cot += 1 if isUserAutoEncoder: tagind = uid else: tagind = sid for v in tmp_vect: if tagind not in v[0]: continue v[1] += tm v[2] += trm v[3] += 1 for v in lp_vect: if tagind not in v[0]: continue v[1] += tm v[2] += trm v[3] += 1 for v in tmp_vect: if v[3] == 0: continue print('pr_bef->\t', v[1]) v[1] = v[1] / v[3] v[2] = np.sqrt(v[2] / v[3]) for i in range(len(name_list_pr)): print('pr->\t' + name_list_pr[i] + ':\t', tmp_vect[i][1:]) print() for v in lp_vect: if v[3] == 0: continue print('lp_bef->\t\t', v[4], ':\t\t', v[1]) v[1] = v[1] / v[3] v[2] = np.sqrt(v[2] / v[3]) for i in range(len(lp_vect)): print('lp->:\t\t', lp_vect[i][1:]) mae = mae * 1.0 / cot rmse = np.sqrt(rmse / cot) print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' % ((time.time() - now), spa, mae, rmse)) print(W) print(S)
def encoder_run(spa): train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % ( spa, case) test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % ( spa, case) W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case) SW_path = base_path + '/Dataset/ws/BP_CF_SW_spa%d_t%d.txt' % (spa, case) loc_path = base_path + '/Dataset/ws' values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa) mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa) print('开始实验,稀疏度=%d,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_data, dtype=float) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('转换数据到矩阵开始') tnow = time.time() u = trdata[:, 0] s = trdata[:, 1] u = np.array(u, int) s = np.array(s, int) R = np.full(us_shape, NoneValue, float) R[u, s] = trdata[:, 2] del trdata, u, s print('转换数据到矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('预处理数据开始') tnow = time.time() Preprocess.removeNoneValue(R) oriR = R.copy() ############################ # 矩阵分解填补预处理 mean = np.sum(R) / np.count_nonzero(R) mf = MF_bl(R.shape, f, mean) mf.preloadValues(mf_values_path) ############################ Preprocess.preprocessMF_rat(R, mf, rat=cmp_rat) print(np.sum(R - oriR)) R /= 20.0 oriR /= 20.0 print('预处理数据结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载地理位置信息开始') tnow = time.time() if isICF: loc_path += '/ws_info.txt' else: loc_path += '/user_info.txt' global loc_tab loc_tab = loadLocation(loc_path) print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), len(loc_tab))) print('训练模型开始') tnow = time.time() tx = us_shape[0] if isUserAutoEncoder: tx = us_shape[1] encoder = BPAE.BPAutoEncoder(tx, hidden_node, actfunc1, deactfunc1, actfunc1, deactfunc1, check_none) if not isUserAutoEncoder: R = R.T if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path) if continue_train: encoder.train(R, learn_param, repeat, None) encoder.saveValues(values_path) # R = oriR; PR = encoder.calFill(R) print(R) print() print(PR) print() ############# PR 还原处理 ############### PR = PR * 20.0 R = R * 20 oriR = oriR * 20 PR = np.where(R != NoneValue, R, PR) print(PR) if not isUserAutoEncoder: PR = PR.T R = R.T ############# PR 还原处理 ############### print('训练模型开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('随机删除开始') tnow = time.time() Preprocess.random_empty(PR, cut_rate) print('随机删除开始,耗时 %.2f秒 \n' % ((time.time() - tnow))) global W, S print('计算相似度矩阵开始') tnow = time.time() oR = R R = PR for i in range(axis0 - 1): if i % 50 == 0: print('----->step%d' % (i)) for j in range(i + 1, axis0): ws = 0.0 a = R[i, :] b = R[j, :] # log = deta = np.subtract(a, b, out=np.zeros_like(a), where=((a != NoneValue) & (b != NoneValue))) ws += np.sum(deta**2) W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1)) # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0); # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0); # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot); np.savetxt(W_path, W, '%.30f') R = PR.T SW = np.zeros((axis1, axis1)) if os.path.exists(SW_path) and load_SW: SW = np.loadtxt(SW_path, np.float64) else: for i in range(axis1 - 1): if i % 50 == 0: print('----->step%d' % (i)) for j in range(i + 1, axis1): ws = 0.0 a = R[i, :] b = R[j, :] # log = deta = np.subtract(a, b, out=np.zeros_like(a), where=((a != NoneValue) & (b != NoneValue))) ws += np.sum(deta**2) SW[i, j] = SW[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1)) # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0); # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0); # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot); np.savetxt(SW_path, SW, '%.10f') R = PR print('计算相似度矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('生成相似列表开始') tnow = time.time() S = np.argsort(-W)[:, 0:k] SS = np.argsort(-SW)[:, 0:sk] print('生成相似列表开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载测试数据开始') tnow = time.time() trdata = np.loadtxt(test_data, dtype=float) n = np.alen(trdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), n)) print('评测开始') tnow = time.time() mae = 0.0 rmse = 0.0 cot = 0 print('oR', oR) print('R', R) for tc in trdata: if tc[2] <= 0: continue urt = predict(int(tc[0]), int(tc[1]), R, W, S) srt = predict_for_s(int(tc[0]), int(tc[1]), R.T, SW, SS) rt = cf_w * urt + (1 - cf_w) * srt mae += abs(rt - tc[2]) rmse += (rt - tc[2])**2 cot += 1 mae = mae * 1.0 / cot rmse = np.sqrt(rmse / cot) print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' % ((time.time() - now), spa, mae, rmse)) print(W)
def clean_cosmics(img, mask, badpixmask=None, method='median', boxsize=5, verbose=False, timit=False): """ This routine replaces the flux in the pixels identified as being affected by cosmics rays (from function "identify_cosmics") with either a median value of surrounding non-cosmic-affected pixels, or with the value of a surface fit to the surrounding non-affected pixels, depending on the "method" kwarg. INPUT: "img" - a 2-dim image "mask" - a 2-dim boolean mask, where True identifies pixels affected by cosmic rays (this MUST have the same dimensions as "img"!!!) "badpixmask" - a 2-dim mask of otherwise bad pixels (other than cosmics), which are not going to be replaced but are not used in the calculation of the median values or spline interpolation of the replacement values KWARGS: "method" - 'median' : the flux values in the cosmic-affected pixels are replaced by the median value of the surrounding non-cosmic-affected pixels - 'spline' : a cubic spline interpolation is performed through the surrounding non-cosmic-affected pixels and the flux of the cosmic-affected pixels is replaced with the interpolated value at their respective locations "boxsize" - the size of the surrounding pixels to be considered. default value is 5, ie a box of 5x5 pixels centred on the affected pixel "verbose" - for debugging... "timit" - boolean - do you want to measure execution run time? This routine borrows heavily from the python translation of LACosmic by Malte Tewes! TODO: implement surface fit method """ if timit: start_time = time.time() #check that img and mask really do have the same size if img.shape != mask.shape: print( "ERROR: Image and cosmic pixel mask do not have the same dimensions!!!" ) quit() return #if badpixmask is supplied, check that it has the same dimensions as well if (badpixmask is not None): if badpixmask.shape != mask.shape: print( 'WARNING: Bad pixel mask has different dimension than image and cosmic pixel mask!!!' ) choice = None while choice == None: choice = raw_input( 'Do you want to continue without using the bad pixel mask? ["y"/"n"] : ' ) if choice in ['n', 'N', 'no', 'No']: quit() return elif choice in ['y', 'Y', 'yes', 'Yes']: print('OK, ignoring bad pixel mask...') else: print('Invalid input! Please try again...') choice = None #check that boxsize is an odd number while (boxsize % 2) == 0: print( 'ERROR: size of the box for median/interpolation needs to be an odd number, please try again!' ) boxsize = input('Enter an odd number for the box size: ') #create a copy of the image which is to be manipulated cleaned = img.copy() if verbose: print("Cleaning cosmic-affected pixels ...") # So...mask is a 2D-array containing False and True, where True means "here is a cosmic" # We want to loop through these cosmics one by one. This is a list of the indices of cosmic affected pixels: cosmicindices = np.argwhere(mask) # We put cosmic ray pixels to np.Inf to flag them : cleaned[mask] = np.Inf # Now we want to have a 2 pixel frame of Inf padding around our image. w = cleaned.shape[0] h = cleaned.shape[1] #padsize = floor(boxsize/2.) #same thing really... padsize = int(boxsize) / 2 #create this "padarray" so that edge effects are taken care of without the need for awkward for-/if-loops around adge pixels padarray = np.zeros((w + boxsize - 1, h + boxsize - 1)) + np.Inf padarray[padsize:w + padsize, padsize:h + padsize] = cleaned.copy( ) # that copy is important, we need 2 independent arrays # The medians will be evaluated in this padarray, excluding the infinite values (that are either the edges, the cosmic-affected pixels or the otherwise bad pixels) # Now in this copy called padarray, we also put the saturated stars to np.Inf, if available : if badpixmask is not None: padarray[padsize:w + padsize, padsize:h + padsize][badpixmask] = np.Inf # A loop through every cosmic pixel : for cosmicpos in cosmicindices: x = cosmicpos[0] y = cosmicpos[1] # if verbose: # print('[x,y] = ['+str(x)+','+str(y)+']') cutout = padarray[ x:x + boxsize, y:y + boxsize].ravel() # remember the shift due to the padding ! # Now we have our cutout pixels, some of them are np.Inf, which will be ignored for calculating median or interpolating goodcutout = cutout[cutout != np.Inf] if np.alen(goodcutout) >= boxsize * boxsize: # This never happened, but you never know ... #raise RuntimeError, "Mega error in clean !" raise RuntimeError("Mega error in clean !") elif np.alen(goodcutout) > 0: #WHICH METHOD??? if method == 'median': replacementvalue = np.median(goodcutout) elif method == 'spline': print('WARNING: THIS IS NOT FULLY IMPLEMENTED YET!!!!!') box = padarray[x:x + boxsize, y:y + boxsize] goodbox = np.argwhere(box != np.Inf) xx = goodbox[:, 1] yy = goodbox[:, 0] zz = goodcutout spline_func = ipol.interp2d(xx, yy, zz, kind='cubic') replacementvalue = spline_func(padsize, padsize) else: #raise RuntimeError, 'invalid kwarg for "method" !' raise RuntimeError('invalid kwarg for "method" !') else: # i.e. no good pixels : Shit, a huge cosmic, we will have to improvise ... print( "WARNING: Huge cosmic ray encounterd - it fills the entire (" + str(boxsize) + "x" + str(boxsize) + ")-pixel cutout! Using backup value...") replacementvalue = np.median( padarray[padarray != np.Inf] ) #I don't like this...maybe need to do sth smarter in the future, but I doubt it will ever happen if boxsize is sufficiently large # Now update the cleaned array, but remember the median was calculated from the padarray...otherwise it would depend on the order in which the cosmics are treated!!! cleaned[x, y] = replacementvalue # That's it. if verbose: #print "Cleaning done!" print("Cleaning done!") if timit: print('Time elapsed: ' + str(np.round(time.time() - start_time, 1)) + ' seconds') #return the cleaned image return cleaned
def JCAMP_calc_xsec(jcamp_dict, wavemin=None, wavemax=None, skip_nonquant=True, debug=False): ''' Taking as input a JDX file, extract the spectrum information and transform the absorption spectrum from existing units to absorption cross-section. This function also corrects for unphysical data (such as negative transmittance values, or transmission above 1.0), and calculates absorbance if transmittance given. Instead of a return value, the function inserts the information into the input dictionary. Note that the conversion assumes that the measurements were collected for gas at a temperature of 296K (23 degC). Parameters ---------- jcamp_dict : dict A JCAMP spectrum dictionary. wavemin : float, optional The shortest wavelength in the spectrum to limit the calculation to. wavemax : float, optional The longest wavelength in the spectrum to limit the calculation to. skip_nonquant: bool If True then return "None" if the spectrum is missing quantitative data. If False, then try \ to fill in missing quantitative values with defaults. ''' x = jcamp_dict['x'] y = jcamp_dict['y'] T = 296.0 ## the temperature (23 degC) used by NIST when collecting spectra R = 1.0355E-25 ## the constant for converting data (includes the gas constant) ## Note: normally when we convert from wavenumber to wavelength units, the ordinate must be nonuniformly ## rescaled in order to compensate. But this is only true if we resample the abscissa to a uniform sampling ## grid. In this case here, we keep the sampling grid nonuniform in wavelength space, such that each digital ## bin retains its proportionality to energy, which is what we want. if (jcamp_dict['xunits'].lower() in ('1/cm', 'cm-1', 'cm^-1')): jcamp_dict['wavenumbers'] = array( x) ## note that array() always performs a copy x = 10000.0 / x jcamp_dict['wavelengths'] = x elif (jcamp_dict['xunits'].lower() in ('micrometers', 'um', 'wavelength (um)')): jcamp_dict['wavelengths'] = x jcamp_dict['wavenumbers'] = 10000.0 / x elif (jcamp_dict['xunits'].lower() in ('nanometers', 'nm', 'wavelength (nm)')): x = x * 1000.0 jcamp_dict['wavelengths'] = x jcamp_dict['wavenumbers'] = 10000.0 / x else: raise ValueError( 'Don\'t know how to convert the spectrum\'s x units ("' + jcamp_dict['xunits'] + '") to micrometers.') ## Correct for any unphysical negative values. y[y < 0.0] = 0.0 ## Make sure "y" refers to absorbance. if (jcamp_dict['yunits'].lower() == 'transmittance'): ## If in transmittance, then any y > 1.0 are unphysical. y[y > 1.0] = 1.0 ## Convert to absorbance. okay = (y > 0.0) y[okay] = log10(1.0 / y[okay]) y[logical_not(okay)] = nan jcamp_dict['absorbance'] = y elif (jcamp_dict['yunits'].lower() == 'absorbance'): pass elif (jcamp_dict['yunits'].lower() == '(micromol/mol)-1m-1 (base 10)'): jcamp_dict['yunits'] = 'xsec (m^2))' jcamp_dict['xsec'] = y / 2.687e19 return else: raise ValueError( 'Don\'t know how to convert the spectrum\'s y units ("' + jcamp_dict['yunits'] + '") to absorbance.') ## Determine the effective path length "ell" of the measurement chamber, in meters. if ('path length' in jcamp_dict): (val, unit) = jcamp_dict['path length'].lower().split()[0:2] if (unit == 'cm'): ell = float(val) / 100.0 elif (unit == 'm'): ell = float(val) elif (unit == 'mm'): ell = float(val) / 1000.0 else: ell = 0.1 else: if skip_nonquant: return ({'info': None, 'x': None, 'xsec': None, 'y': None}) ell = 0.1 if debug: print( 'Path length variable not found. Using 0.1m as a default ...') assert (alen(x) == alen(y)) if ('npoints' in jcamp_dict): if (alen(x) != jcamp_dict['npoints']): npts_retrieved = str(alen(x)) msg = '"' + jcamp_dict['title'] + '": Number of data points retrieved (' + npts_retrieved + \ ') does not equal the expected length (npoints = ' + str(jcamp_dict['npoints']) + ')!' raise ValueError(msg) ## For each gas, manually define the pressure "p" at which the measurement was taken (in units of mmHg). ## These values are obtained from the NIST Infrared spectrum database, which for some reason did not ## put the partial pressure information into the header. if ('partial_pressure' in jcamp_dict): p = float(jcamp_dict['partial_pressure'].split()[0]) p_units = jcamp_dict['partial_pressure'].split()[1] if (p_units.lower() == 'mmhg'): pass elif (p_units.lower() == 'ppm'): p = p * 759.8 * 1.0E-6 ## scale PPM units at atmospheric pressure to partial pressure in mmHg else: if debug: print('Partial pressure variable value for ' + jcamp_dict['title'] + ' is missing. Using the default p = 150.0 mmHg ...') if skip_nonquant: return ({'info': None, 'x': None, 'xsec': None, 'y': None}) p = 150.0 ## Convert the absorbance units to cross-section in meters squared per molecule. xsec = y * T * R / (p * ell) ## Add the "xsec" values to the data dictionary. jcamp_dict['xsec'] = xsec return
n_folds = 10 for i, (name, dataset) in enumerate(mldata.datasets.iteritems()): print('Dataset number {}'.format(i)) if name == 'MNIST': # TODO get a stratified portion of the validation set [0:60000] dataset._data = dataset._data[-10000:] dataset._target = dataset._target[-10000:] mldata.sumarize_datasets(name) for mc in np.arange(mc_iterations): skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True) test_folds = skf.test_folds for test_fold in np.arange(n_folds): x_train, y_train, x_test, y_test = separate_sets( dataset.data, dataset.target, test_fold, test_folds) n_training = np.alen(y_train) for actual_class in dataset.classes: tr_class = x_train[y_train == actual_class, :] t_labels = (y_test == actual_class).astype(int) prior = np.alen(tr_class) / n_training if np.alen(tr_class) > 1 and not all(t_labels == 0): n_c = tr_class.shape[1] if n_c > np.alen(tr_class): n_c = np.alen(tr_class) # Train a Density estimator model_mymvn = MyMultivariateNormal(covariance_type='diag') model_mymvn.fit(tr_class) model_mvn = MultivariateNormal(covariance_type='diag') model_mvn.fit(tr_class)
gameR[(gameR.shape[0] - 1) - i][0]) if memoryR.shape[0] == 1: memorySA = gameSA memoryR = gameR memoryA = gameA memoryS = gameS else: #Add experience to memory memorySA = np.concatenate((memorySA, gameSA), axis=0) memoryS = np.concatenate((memoryS, gameS), axis=0) memoryR = np.concatenate((memoryR, gameR), axis=0) memoryA = np.concatenate((memoryA, gameA), axis=0) #if memory is full remove first element if np.alen(memorySA) >= max_memory_len: memorySA = memorySA[gameR.shape[0]:] memoryR = memoryR[gameR.shape[0]:] memoryA = memoryA[gameR.shape[0]:] memoryS = memoryS[gameR.shape[0]:] #Update the states qs = s #Retrain every X failures after num_initial_observation if done and game >= num_initial_observation: if game % 5 == 0: print("Training game# ", game, "momory size", memorySA.shape[0]) #training Reward predictor model
def run_cf(spa): global R,W,S,sumS,loc_tab; train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case); W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case); loc_path = base_path+'/Dataset/ws'; print('开始实验,isICF=%s,稀疏度=%d,case=%d'%(isICF,spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('加载地理位置信息开始'); tnow = time.time(); if isICF: loc_path+='/ws_info.txt'; else: loc_path+='/user_info.txt'; loc_tab = loadLocation(loc_path); n = np.alen(trdata); print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('转换数据到矩阵开始'); tnow = time.time(); u = trdata[:,0]; s = trdata[:,1]; u = np.array(u,int); s = np.array(s,int); R = np.full(us_shape, NoneValue, float); R[u,s]=trdata[:,2]; if isICF: R = R.T; del trdata,u,s; print ('转换数据到矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('计算相似度矩阵开始'); tnow = time.time(); i=0; if readWcache and os.path.exists(W_path): W = np.loadtxt(W_path, np.float128); else: for i in range(axis0): if i%50 ==0: print('----->step%d'%(i)) for j in range(axis0): a = R[i,:]; b = R[j,:]; alog = a!=NoneValue; blog = b!=NoneValue; delta = np.subtract(a,b,out=np.zeros_like(a),where=alog&blog); ws = np.sum(delta**2); W[i,j]= 1.0/math.exp(np.sqrt(ws)); for i in range(axis0): W[i,i]=0; np.savetxt(W_path,W,'%.30f'); print ('计算相似度矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成相似列表开始'); tnow = time.time(); S = np.argsort(-W)[:,0:k]; for i in range(axis0): sumS[i] = np.sum(W[i,S[i]]); print ('生成相似列表开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载测试数据开始'); tnow = time.time(); trdata = np.loadtxt(test_data, dtype=float); n = np.alen(trdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; for tc in trdata: if tc[2]<=0: continue; rt = predict(int(tc[0]),int(tc[1])); mae+=abs(rt-tc[2]); rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,isICF=%s,稀疏度=%d,MAE=%.3f,RMSE=%.3f\n'%((time.time()-now),isICF,spa,mae,rmse)); print('----------------------------------------------------------\n'); print(W); print(S);
def bspleval(x, knots, coeffs, order, debug=False): ''' Evaluate a B-spline at a set of points. Parameters ---------- x : list or ndarray The set of points at which to evaluate the spline. knots : list or ndarray The set of knots used to define the spline. coeffs : list of ndarray The set of spline coefficients. order : int The order of the spline. Returns ------- y : ndarray The value of the spline at each point in x. ''' k = order t = knots m = np.alen(t) npts = np.alen(x) B = np.zeros((m - 1, k + 1, npts)) if debug: print('k=%i, m=%i, npts=%i' % (k, m, npts)) print('t=', t) print('coeffs=', coeffs) ## Create the zero-order B-spline basis functions. for i in range(m - 1): B[i, 0, :] = np.float64(np.logical_and(x >= t[i], x < t[i + 1])) if (k == 0): B[m - 2, 0, -1] = 1.0 ## Next iteratively define the higher-order basis functions, working from lower order to higher. for j in range(1, k + 1): for i in range(m - j - 1): if (t[i + j] - t[i] == 0.0): first_term = 0.0 else: first_term = ((x - t[i]) / (t[i + j] - t[i])) * B[i, j - 1, :] if (t[i + j + 1] - t[i + 1] == 0.0): second_term = 0.0 else: second_term = ((t[i + j + 1] - x) / (t[i + j + 1] - t[i + 1])) * B[i + 1, j - 1, :] B[i, j, :] = first_term + second_term B[m - j - 2, j, -1] = 1.0 if debug: plt.figure() for i in range(m - 1): plt.plot(x, B[i, k, :]) plt.title('B-spline basis functions') ## Evaluate the spline by multiplying the coefficients with the highest-order basis functions. y = np.zeros(npts) for i in range(m - k - 1): y += coeffs[i] * B[i, k, :] if debug: plt.figure() plt.plot(x, y) plt.title('spline curve') plt.show() return (y)
def div_by_behav(odor1, odor2, air, pump, lick, delay=2): odor1_changed = np.diff(odor1) odor2_changed = np.diff(odor2) trial_start = 0 odor1_trial = 0 odor2_trial = 0 trials = 0 odor_list = [] delay = delay * 100 for k in range(len(odor1_changed)): if ((odor1_changed[k] == 1) and (odor1[k + 20] == 1)): trials += 1 odor1_trial += 1 odor_list.append(1) # go_trial_position.append(k) elif ((odor2_changed[k] == 1) and (odor2[k + 20] == 1)): trials += 1 odor2_trial += 1 odor_list.append(2) #print(odor1_trial) print(trials, 'trials') print(max(odor_list), odor1_trial, odor2_trial) all_lick = np.zeros((trials, 1400)) all_pump = np.zeros((trials, 1400)) all_airpuff = np.zeros((trials, 1400)) now1_trial = -1 now2_trial = -1 now_trial = -1 i = 0 time = -500 stop = 0 while now_trial + 1 < trials: while i < len(odor1_changed): if now_trial > trials: break if (odor1_changed[i] == 1) and (odor1[i + 20] == 1) and (trial_start == 0): now_trial += 1 now1_trial += 1 odor = 1 if np.alen(lick[i - delay:i + (1400 - delay)]) == 1400: all_lick[now_trial, :] = lick[i - delay:i + (1400 - delay)] all_pump[now_trial, :] = pump[i - delay:i + (1400 - delay)] all_airpuff[now_trial, :] = air[i - delay:i + (1400 - delay)] else: l = np.alen(lick[i - delay:i + (1400 - delay)]) all_lick[now_trial, 0:l] = lick[i - delay:i + (l - delay)] all_pump[now_trial, 0:l] = pump[i - delay:i + (l - delay)] all_airpuff[now_trial, 0:l] = air[i - delay:i + (l - delay)] if odor_list[now_trial] != odor: print(odor_list[now_trial], odor, ' diff!!!!') i += 1000 continue elif (odor2_changed[i] == 1) and (odor2[i + 20] == 1) and (trial_start == 0): now_trial += 1 now2_trial += 1 odor = 2 if np.alen(lick[i - delay:i + (1400 - delay)]) == 1400: all_lick[now_trial, :] = lick[i - delay:i + (1400 - delay)] all_pump[now_trial, :] = pump[i - delay:i + (1400 - delay)] all_airpuff[now_trial, :] = air[i - delay:i + (1400 - delay)] else: l = np.alen(lick[i - delay:i + (1400 - delay)]) all_lick[now_trial, 0:l] = lick[i - delay:i + (l - delay)] all_pump[now_trial, 0:l] = pump[i - delay:i + (l - delay)] all_airpuff[now_trial, 0:l] = air[i - delay:i + (l - delay)] if odor_list[now_trial] != odor: print(odor_list[now_trial], odor, ' diff!!!!') i += 1000 continue i += 1 return all_lick, all_pump, all_airpuff, odor_list
(eigval,eigvect) = np.linalg.eigh(laplasian) eigval = np.array(eigval) eigval = eigval.astype(int) ei = np.argsort(eigval) # Saving the eigen values into a text file. np.savetxt("eigenvalues.csv", eigval, delimiter=" ") G = nx.from_numpy_matrix(eigvect) nx.draw_networkx(G,with_labels=True) firstkmat = eigvect[ei[::-1][0:4]] firstkmat = np.transpose(firstkmat) # Clustering using kmeans++ and the number of clusters are chosen from the eigen value plot. kmeans = KMeans(n_clusters=np.alen(firstkmat[0]), init='k-means++', max_iter=100, precompute_distances=True) kmeans.fit(firstkmat) labels = kmeans.predict(firstkmat) clusters_dict = {} def get_key(val): for key, value in username_list_dict.items(): if val == value: return key for i in range(0,len(labels)): ls = clusters_dict[labels[i]] if labels[i] not in clusters_dict: clusters_dict[labels[i]]=[] ls = [] if not len(clusters_dict.get(labels[i]))==0 :
''' #Add experience to memory memorySA = np.concatenate((memorySA,gameSA),axis=0) #memoryS = np.concatenate((memoryS,gameS),axis=0) memoryR = np.concatenate((memoryR,gameR),axis=0) #memoryA = np.concatenate((memoryA,gameA),axis=0) ''' if memoryR.shape[0] == 1: memoryR = tempGameR memorySA = tempGameSA else: memorySA = np.concatenate((memorySA, tempGameSA), axis=0) memoryR = np.concatenate((memoryR, tempGameR), axis=0) #if memory is full remove first element if np.alen(memoryR) >= max_memory_len: memoryR = memoryR[np.alen(gameR):] memorySA = memorySA[np.alen(gameR):] #print("memory full. mem len ", np.alen(memoryX)) #for l in range(np.alen(gameR)): #memorySA = np.delete(memorySA, 0, axis=0) #memoryR = np.delete(memoryR, 0, axis=0) #memoryA = np.delete(memoryA, 0, axis=0) #memoryS = np.delete(memoryS, 0, axis=0) #Update the states previous_state = np.copy(qs) qs = s #Retrain every X failures after num_initial_observation if done and game >= num_initial_observation:
if i == (game_y.shape[0] - 1): logging.info( 'Training game: #%s steps: %s last reward: %s end score: %s', cur_game, step, reward, game_y[game_epoch][0]) # Memory is experience if memory_x.shape[0] == 1: memory_x = game_x memory_y = game_y # Add experience to memory else: memory_x = np.concatenate((memory_x, game_x), axis=0) memory_y = np.concatenate((memory_y, game_y), axis=0) # Clear memory if full if np.alen(memory_x) >= max_memory: logging.debug('Memory filled at %s. Clearing memory...', np.alen(memory_x)) for exp in range(np.alen(game_x)): memory_x = np.delete(memory_x, 0, axis=0) memory_y = np.delete(memory_y, 0, axis=0) # Update states q_state = state # Retrain every 10 sets if done: if cur_game % training_sets == 0: logging.info('Training game: #%s Memory: %s', cur_game, memory_x.shape[0]) history = model.fit(memory_x,
print("Xtrain") print(Xtrain) print(Xtrain.shape) Xtest = X[300:] print("Xtest") print(Xtest) print(Xtest.shape) T = readExcel(excelfile) print("T shape:") print(T.shape) # convert Y = 1 and N = 0; TB = np.full(T.shape, -1).astype(int) for i, row in enumerate(T): for j in range(np.alen(row)): if T[i, j] == 'Y': # 1 for positive TB[i, j] = 1 else: # 0 for negative TB[i, j] = 0 # Separate training and testing class labels Ttrain = TB[:300] Ttest = TB[300:] # Prepare the training data TtrainSmile = Ttrain[:, 0] TtrainBlink = Ttrain[:, 1] TtrainGood = Ttrain[:, 2] print("Traing good:")
def run(args): if args.files: files = args.files elif args.listfile: with open(args.listfile) as f: files = list(map(str.strip, f.readlines())) else: parser.print_usage() sys.exit(0) failed = False for f in files: ext = os.path.splitext(f) if not os.path.exists(f): eprint('Error: %s does not exist.' % f) failed = True elif not (ext[1] == '.h5' or ext[1] == '.hdf5'): eprint('Error: %s is not a hdf5 file.' % f) failed = True elif not os.path.isfile(f): eprint('Error: %s is not a file.' % f) failed = True if failed: sys.exit(1) sub = None if args.substitute: split = args.substitute.split(':') if len(split) != 2: eprint('Error: invalid subsitution syntax "%s". Syntax is "replace:with".' % args.substitute) failed = True sub = split if args.destdir: if not os.path.isdir(args.destdir): eprint('Error: destination dir "%s" does not exist.' % args.destdir) failed = True if failed: sys.exit(1) bases = [] shifts_fns = [] for f in files: base_name = os.path.splitext(f)[0] remove_suffix = '_aligned' if base_name.endswith(remove_suffix): base_name = base_name[:-len(remove_suffix)] shifts_fn = base_name + '_shifts.npy' shifts_fns.append(shifts_fn) if not args.no_shifts and not os.path.exists(shifts_fn): eprint('Error: "%s" does not exist.' % shifts_fn) failed = True if args.destdir: base_name = os.path.join(args.destdir, os.path.basename(base_name)) bases.append(base_name) elif sub: if base_name.find(sub[0]) == -1: eprint('Error: filename "%s" does not contain "%s" for substitution.' % (f, sub[0])) failed = True base_name = base_name.replace(*sub) bases.append(base_name) else: bases.append(base_name) if failed: sys.exit(1) necessary = [] if not args.no_verbose: print('Arguments look good. This will be processed:') for f, b in zip(files, bases): this_necessary = not all([os.path.isfile(b + s) for s in SUFFIXES]) or args.overwrite necessary.append(this_necessary) if not args.no_verbose: print(('' if this_necessary else '[SKIP] ') + f) for suffix in SUFFIXES: print((' -> ' if this_necessary else '[ALREADY EXISTS] ') + '%s%s' % (b, suffix)) print() necessary_files = [(f, shifts_fn, b) for f, shifts_fn, b, n in zip(files, shifts_fns, bases, necessary) if n] if len(necessary_files) == 0: print('Nothing to process.') sys.exit(0) template = segmentation.load_template() for f, shifts_fn, b in prog_percent(necessary_files): print(f) print('='*len(f)) try: base = b if not args.no_shifts: print('Loading shifts...') shifts = np.load(shifts_fn) shift_dists = np.sqrt(np.sum(np.square(shifts), axis=1)) print('Loading stack...') #stack = dd.io.load(f) with h5py.File(f, "r") as f_: # List all groups print("Keys: %s" % f_.keys()) start=time.time() stack=f_['data'][()] end=time.time() print('Time to load file: ',end-start) print('Computing std...') if not args.no_shifts: invalid_frames = [i for i in np.arange(np.alen(stack)) if shift_dists[i] > args.shift_threshold] else: invalid_frames = [] valid_frames = segmentation.valid_frames(invalid_frames, length=np.alen(stack)) std = segmentation.std(stack, valid_frames=valid_frames) print('Saving std...') io.save(base + STD_DEV_SUFFIX, std, spacing=io.SPACING_JAKOB) print('Finding rois...') rois = segmentation.find_rois_template(std, template=template) print('Saving rois...') np.save(base + ROIS_SUFFIX, rois) print('Getting traces...') traces = segmentation.get_traces(stack, rois, use_radius=5) print('Saving traces...') np.save(base + TRACES_SUFFIX, traces) except Exception as e: print('An exception occured:') print(e)
def div_by_cue(cal_time, cal_data, cue1, cue2, cue_hz, delay=2): cue_interval = 1 / cue_hz i = 0 t = 0 delay = delay * 50 trial_start = 0 change_cue1 = np.diff(cue1) cue1_trialnum = np.sum(np.abs(change_cue1) / 2) #print(int(cue1_trialnum)) change_cue2 = np.diff(cue2) cue2_trialnum = np.sum(np.abs(change_cue2) / 2) print("tn", cue1_trialnum, cue2_trialnum) all_cue_cal = np.zeros((int(cue1_trialnum + cue2_trialnum), 700)) cue1_cal = np.zeros((int(cue1_trialnum), 700)) cue2_cal = np.zeros((int(cue2_trialnum), 700)) all_cue = 0 now_cue1 = 0 now_cue2 = 0 cue_order = [] while i < np.alen(change_cue1): if change_cue1[i] == 1: trial_start = round(i / 20) for k in range(trial_start - 20, trial_start + 20): if np.sum(cal_time[0:k]) <= t and np.sum( cal_time[0:k + 1]) > t: if len(cal_data[k:k + 600]) < 600: cue1_cal = np.delete(cue1_cal, now_cue1, 0) all_cue_cal = np.delete(all_cue_cal, all_cue, 0) print('cue1' + str(now_cue1)) break else: cue1_cal[now_cue1, 0:delay] = cal_data[k - delay:k] cue1_cal[now_cue1, delay:] = cal_data[k:k + (700 - delay)] all_cue_cal[all_cue, :] = cue1_cal[now_cue1, :] cue_order.append(1) now_cue1 += 1 all_cue += 1 break #print('1!') elif change_cue2[i] == 1: trial_start = round(i / 20) for k in range(trial_start - 20, trial_start + 20): if np.sum(cal_time[0:k]) <= t and np.sum( cal_time[0:k + 1]) > t: if len(cal_data[k:k + 600]) < 600: cue2_cal = np.delete(cue2_cal, now_cue2, 0) all_cue_cal = np.delete(all_cue_cal, all_cue, 0) print('cue2' + str(now_cue2)) break else: cue2_cal[now_cue2, 0:delay] = cal_data[k - delay:k] cue2_cal[now_cue2, delay:] = cal_data[k:k + (700 - delay)] all_cue_cal[all_cue, :] = cue2_cal[now_cue2, :] cue_order.append(2) now_cue2 += 1 all_cue += 1 break #print('2!') t += cue_interval i += 1 for i in range(np.alen(all_cue_cal)): if np.max(all_cue_cal[i, :]) == 0: all_cue_cal = np.delete(all_cue_cal, i, 0) print('empty trial!', i) break print(np.shape(all_cue_cal)) #print(cue1_cal,np.mean(cue1_cal)) #print(cue2_cal,np.mean(cue2_cal)) return cue1_cal, cue2_cal, all_cue_cal, cue_order
def actor_experience_replay(): tSA = (memorySA) tR = (memoryR) tX = (memoryS) tY = (memoryA) tW = (memoryW) target = tR.mean() #+ math.fabs( tR.mean() - tR.max() )/2 #+ math.fabs( tR.mean() - tR.max() )/4 train_C = np.arange(np.alen(tR)) train_C = train_C[tR.flatten()>target] tX = tX[train_C,:] tY = tY[train_C,:] tW = tW[train_C,:] tR = tR[train_C,:] train_A = np.random.randint(tY.shape[0],size=int(min(experience_replay_size,np.alen(tR) ))) tX = tX[train_A,:] tY = tY[train_A,:] tW = tW[train_A,:] tR = tR[train_A,:] train_B = np.arange(np.alen(tR)) tX_train = np.zeros(shape=(1,num_env_variables)) tY_train = np.zeros(shape=(1,num_env_actions)) for i in range(np.alen(train_B)): #pr = predictTotalRewards(tX[i],tY[i]) ''' YOU CAN"T USE predictTotalRewards IF YOU DON"T TRAIN THE QMODEL if tR[i][0] < pr: tW[i][0] = -1 else: ''' d = math.fabs( memoryR.max() - target) tW[i] = math.fabs(tR[i]-(target+0.000000000005)) / d tW[i] = math.exp(1-(1/tW[i]**2)) if tW[i]> np.random.rand(1): tX_train = np.vstack((tX_train,tX[i])) tY_train = np.vstack((tY_train,tY[i])) #print ("tW",tW[i],"exp", math.exp(1-(1/tW[i]**2))) #tW[i] = math.exp(1-(1/tW[i]**2)) #tW[i] = 1 #print("tW[i] %3.1f tR %3.2f pr %3.2f "%(tW[i],tR[i],pr)) ''' train_B = train_B[tW.flatten()>0] print("%8d were better results than pr"%np.alen(tX_train)) tX = tX[train_B,:] tY = tY[train_B,:] tW = tW[train_B,:] tR = tR[train_B,:] #print("tW",tW) ''' print("%8d were better results than pr"%np.alen(tX_train)) ''' REMOVE FIRST ELEMENT BEFORE TRAINING ''' tX_train = tX_train[1:] tY_train = tY_train[1:] print("%8d were better After removing first element"%np.alen(tX_train)) if np.alen(tX_train)>0: #tW = scale_weights(tR,tW) #print("# setps short listed ", np.alen(tR)) action_predictor_model.fit(tX_train,tY_train, batch_size=mini_batch, nb_epoch=training_epochs,verbose=0)
memoryW = tempGameW else: #Add experience to memory memoryS = np.concatenate((memoryS, tempGameS), axis=0) memoryRR = np.concatenate((memoryRR, tempGameRR), axis=0) memoryA = np.concatenate((memoryA, tempGameA), axis=0) memorySA = np.concatenate((memorySA, tempGameSA), axis=0) memoryR = np.concatenate((memoryR, tempGameR), axis=0) memoryW = np.concatenate((memoryW, tempGameW), axis=0) if gameR.mean() > max_game_average: max_game_average = gameR.mean() #if memory is full remove first element if np.alen(memoryR) >= max_memory_len: memorySA = memorySA[gameR.shape[0]:] memoryR = memoryR[gameR.shape[0]:] memoryA = memoryA[gameR.shape[0]:] memoryS = memoryS[gameR.shape[0]:] memoryRR = memoryRR[gameR.shape[0]:] memoryW = memoryW[gameR.shape[0]:] qs = s if done and game > num_initial_observation and not PLAY_GAME: last_game_average = gameR.mean() if game > 3 and game % 2 == 0: # train on all memory print("Experience Replay") #for i in range(3):
def propose(self): if self.verbose: print_(self._id + ' proposing') fc = pm.gp.fast_matrix_copy eps_p_f = pm.utils.value(self.eps_p_f) f = pm.utils.value(self.f_eval) for i in xrange(len(self.scratch3)): self.scratch3[i] = np.sum(eps_p_f[self.ti[i]] - f[i]) # Compute Cholesky factor of covariance of eps_p_f, C(x,x) + V C_eval_value = pm.utils.value(self.C_eval) C_eval_shape = C_eval_value.shape # Get the Cholesky factor of C_eval, plus the nugget. # I don't think you can use S_eval for speed, unfortunately. in_chol = fc(C_eval_value, self.scratch1) v_val = pm.utils.value(self.V) for i in xrange(pm.utils.value(C_eval_shape)[0]): in_chol[i, i] += v_val[i] / np.alen(self.ti[i]) info = pm.gp.linalg_utils.dpotrf_wrap(in_chol) if info > 0: raise np.linalg.LinAlgError # Compute covariance of f conditional on eps_p_f. offdiag = fc(C_eval_value, self.scratch2) offdiag = pm.gp.trisolve(in_chol, offdiag, uplo='U', transa='T', inplace=True) C_step = offdiag.T * offdiag C_step *= -1 C_step += C_eval_value # Compute mean of f conditional on eps_p_f. for i in xrange(len(self.scratch3)): self.scratch3[i] = np.mean(eps_p_f[self.ti[i]]) m_step = pm.utils.value(self.M_eval) + np.dot( offdiag.T, pm.gp.trisolve(in_chol, (self.scratch3 - self.M_eval.value), uplo='U', transa='T')).view(np.ndarray).ravel() sig_step = C_step info = pm.gp.linalg_utils.dpotrf_wrap(C_step.T) if info > 0: warnings.warn( 'Full conditional covariance was not positive definite.') return # Update value of f. self.f_eval.value = m_step + np.dot( sig_step, np.random.normal(size=sig_step.shape[1])).view( np.ndarray).ravel() # Propose the rest of the field from its conditional prior. self.f.rand()
def test_alen(self): self.assert_deprecated(lambda: np.alen(np.array([1, 2, 3])))
def process_dicom_series( dicom_series_dict, series_uid, parent_sorting_field="PatientName", return_extra=True, individual_file=False, initial_sop_class_name_default="UNKNOWN", ): if not individual_file: logger.info(f" Processing series UID: {series_uid}") dicom_file_list = dicom_series_dict[series_uid] else: logger.info(f" Processing individual file: {individual_file}") dicom_file_list = [individual_file] logger.info(f" Number of DICOM files: {len(dicom_file_list)}") initial_dicom = pydicom.read_file(dicom_file_list[0]) # Get the data in the parent sorting field, clean with RegEx parent_sorting_data = re.sub( r"[^\w]", "_", str(initial_dicom[parent_sorting_field].value)).upper() if parent_sorting_data == "": logger.error( f"Could not find any data in {parent_sorting_field}. This is very bad, the data cannot be sorted properly." ) """ ! TO DO Implement a routine to let a user correlate a root directory with a name """ parent_sorting_data = "TEMP" try: initial_dicom_sop_class_name = initial_dicom.SOPClassUID.name except AttributeError: logger.warning( f"Could not find DICOM SOP Class UID, using {initial_sop_class_name_default}." ) initial_dicom_sop_class_name = initial_sop_class_name_default try: study_uid = initial_dicom.StudyInstanceUID except AttributeError: study_uid = "00001" """ ! TO DO Need to check for secondary capture image storage This can include JPEGs with written information on them This is typically not very useful We can dump it to file Or just save the DICOM file in the folder of interest Not a big problem, sort out another day """ # Check the potential types of DICOM files if ("Image" in initial_dicom_sop_class_name and initial_dicom_sop_class_name != "Secondary Capture Image Storage"): # Load as an primary image sorted_file_list = safe_sort_dicom_image_list(dicom_file_list) try: image = sitk.ReadImage(sorted_file_list) except RuntimeError: logger.warning(" Could not read image into SimpleITK.") logger.info(" Processing files individually.") for dicom_file in dicom_file_list: return process_dicom_series( dicom_series_dict, series_uid, parent_sorting_field=parent_sorting_field, return_extra=return_extra, individual_file=dicom_file, initial_sop_class_name_default= initial_sop_class_name_default, ) dicom_file_metadata = { "parent_sorting_data": parent_sorting_data, "study_uid": study_uid, } """ ! TO DO - integrity check Read in all the files here, check the slice location and determine if any are missing """ if initial_dicom.Modality == "PT": # scaling_factor = get_suv_bw_scale_factor(initial_dicom) # image *= scaling_factor # !TO DO # Work on PET SUV conversion None """ ! CHECKPOINT Some DCE MRI sequences have the same series UID Here we check the sequence name, and split if necessary """ if initial_dicom.Modality == "MR": try: sequence_names = np.unique([ pydicom.read_file(x).SequenceName for x in dicom_file_list ]) sequence_dict = {} for dcm_name in dicom_file_list: dcm_obj = pydicom.read_file(dcm_name) var = dcm_obj.SequenceName if var not in sequence_dict.keys(): sequence_dict[var] = [dcm_name] else: sequence_dict[var].append(dcm_name) except AttributeError: try: logger.warning( " MRI sequence name not found. The SeriesDescription will be used instead." ) sequence_names = np.unique([ pydicom.read_file(x).SeriesDescription for x in dicom_file_list ]) sequence_dict = {} for dcm_name in dicom_file_list: dcm_obj = pydicom.read_file(dcm_name) var = dcm_obj.SeriesDescription if var not in sequence_dict.keys(): sequence_dict[var] = [dcm_name] else: sequence_dict[var].append(dcm_name) except AttributeError: logger.warning( " MRI SeriesDescription not found. The AcquisitionComments will be used instead." ) sequence_names = np.unique([ pydicom.read_file(x).AcquisitionComments for x in dicom_file_list ]) sequence_dict = {} for dcm_name in dicom_file_list: dcm_obj = pydicom.read_file(dcm_name) var = dcm_obj.AcquisitionComments if var not in sequence_dict.keys(): sequence_dict[var] = [dcm_name] else: sequence_dict[var].append(dcm_name) if initial_dicom.Manufacturer == "GE MEDICAL SYSTEMS": # GE use the DICOM tag (0019, 10a2) [Raw data run number] # in Diffusion weighted MRI sequences # We need to separate this out to get the difference sequences if initial_dicom.SeriesDescription == "Diffusion Weighted": # num_sequences = int( (initial_dicom[(0x0025, 0x1007)]) / (initial_dicom[(0x0021, 0x104f)]) ) # number_of_images / images_per_seq num_images_per_seq = initial_dicom[(0x0021, 0x104F)].value sequence_names = np.unique([ f"DWI_{str( ( pydicom.read_file(x)['InstanceNumber'].value - 1) // num_images_per_seq )}" for x in dicom_file_list ]) sequence_name_index_dict = { name: index for index, name in enumerate(sequence_names) } sequence_dict = {} for dcm_name in dicom_file_list: dcm_obj = pydicom.read_file(dcm_name) var = f"DWI_{str( ( dcm_obj['InstanceNumber'].value - 1) // num_images_per_seq )}" var_to_index = sequence_name_index_dict[var] if var_to_index not in sequence_dict.keys(): sequence_dict[var_to_index] = [dcm_name] else: sequence_dict[var_to_index].append(dcm_name) sequence_names = sorted(sequence_dict.keys()) if np.alen(sequence_names) > 1: logger.warning( " Two MR sequences were found under a single series UID.") logger.warning(" These will be split into separate images.") # Split up the DICOM file list by sequence name for sequence_name in sequence_names: dicom_file_list_by_sequence = sequence_dict[sequence_name] logger.info(sequence_name) logger.info(len(dicom_file_list_by_sequence)) sorted_file_list = safe_sort_dicom_image_list( dicom_file_list_by_sequence) initial_dicom = pydicom.read_file(sorted_file_list[0], force=True) image_by_sequence = sitk.ReadImage(sorted_file_list) dicom_file_metadata_by_sequence = { "parent_sorting_data": parent_sorting_data, "study_uid": study_uid, } yield "IMAGES", dicom_file_metadata_by_sequence, initial_dicom, image_by_sequence return # Stop iteration yield "IMAGES", dicom_file_metadata, initial_dicom, image if "Structure" in initial_dicom_sop_class_name: # Load as an RT structure set # This should be done individually for each file logger.info(f" Number of files: {len(dicom_file_list)}") for index, dicom_file in enumerate(dicom_file_list): dicom_object = pydicom.read_file(dicom_file, force=True) # We must also read in the corresponding DICOM image # This can be found by matching the references series UID to the series UID """ ! TO DO What happens if there is an RT structure set with different referenced sequences? """ # Get the "ReferencedFrameOfReferenceSequence", first item referenced_frame_of_reference_item = dicom_object.ReferencedFrameOfReferenceSequence[ 0] # Get the "RTReferencedStudySequence", first item # This retrieves the study UID # This might be useful, but would typically match the actual StudyInstanceUID in the # DICOM object rt_referenced_series_item = (referenced_frame_of_reference_item. RTReferencedStudySequence[0]) # Get the "RTReferencedSeriesSequence", first item # This retreives the actual referenced series UID, which we need to match imaging # parameters rt_referenced_series_again_item = rt_referenced_series_item.RTReferencedSeriesSequence[ 0] # Get the appropriate series instance UID image_series_uid = rt_referenced_series_again_item.SeriesInstanceUID logger.info( f" Item {index}: Matched SeriesInstanceUID = {image_series_uid}" ) # Read in the corresponding image sorted_file_list = safe_sort_dicom_image_list( dicom_series_dict[image_series_uid]) image = sitk.ReadImage(sorted_file_list) initial_dicom = pydicom.read_file(sorted_file_list[0], force=True) ( structure_name_list, structure_image_list, ) = transform_point_set_from_dicom_struct(image, dicom_object) dicom_file_metadata = { "parent_sorting_data": parent_sorting_data, "study_uid": study_uid, "structure_name_list": structure_name_list, } yield "STRUCTURES", dicom_file_metadata, dicom_object, structure_image_list if "Dose" in initial_dicom_sop_class_name: # Load as an RT Dose distribution # This should be done individually for each file logger.info(f" Number of files: {len(dicom_file_list)}") for index, dicom_file in enumerate(dicom_file_list): dicom_object = pydicom.read_file(dicom_file, force=True) """ ! CHECKPOINT There should only be a single RT dose file (with each series UID) If there are more, yield each """ initial_dicom = pydicom.read_file(dicom_file, force=True) dicom_file_metadata = { "parent_sorting_data": parent_sorting_data, "study_uid": study_uid, } # We must read in as a float otherwise when we multiply by one later it will not work! raw_dose_image = sitk.ReadImage(dicom_file, sitk.sitkFloat32) dose_grid_scaling = dicom_object.DoseGridScaling logger.debug(f" Dose grid scaling: {dose_grid_scaling} Gy") scaled_dose_image = raw_dose_image * dose_grid_scaling yield "DOSES", dicom_file_metadata, dicom_object, scaled_dose_image """ ! TO DO 1. (DONE) Implement conversion of dose files (to NIFTI images) 2. Implement conversion of RT plan files to text dump 3. Do something with other files (e.g. Deformable Image Registration stuff) """ return
def _StartCountStride(elem, shape, dimensions=None, grp=None, datashape=None,\ put=False, use_get_vars = False): """Return start, count, stride and indices needed to store/extract data into/from a netCDF variable. This function is used to convert a slicing expression into a form that is compatible with the nc_get_vars function. Specifically, it needs to interpret integers, slices, Ellipses, and 1-d sequences of integers and booleans. Numpy uses "broadcasting indexing" to handle array-valued indices. "Broadcasting indexing" (a.k.a "fancy indexing") treats all multi-valued indices together to allow arbitrary points to be extracted. The index arrays can be multidimensional, and more than one can be specified in a slice, as long as they can be "broadcast" against each other. This style of indexing can be very powerful, but it is very hard to understand, explain, and implement (and can lead to hard to find bugs). Most other python packages and array processing languages (such as netcdf4-python, xray, biggus, matlab and fortran) use "orthogonal indexing" which only allows for 1-d index arrays and treats these arrays of indices independently along each dimension. The implementation of "orthogonal indexing" used here requires that index arrays be 1-d boolean or integer. If integer arrays are used, the index values must be sorted and contain no duplicates. In summary, slicing netcdf4-python variable objects with 1-d integer or boolean arrays is allowed, but may give a different result than slicing a numpy array. Numpy also supports slicing an array with a boolean array of the same shape. For example x[x>0] returns a 1-d array with all the positive values of x. This is also not supported in netcdf4-python, if x.ndim > 1. Orthogonal indexing can be used in to select netcdf variable slices using the dimension variables. For example, you can use v[lat>60,lon<180] to fetch the elements of v obeying conditions on latitude and longitude. Allow for this sort of simple variable subsetting is the reason we decided to deviate from numpy's slicing rules. This function is used both by the __setitem__ and __getitem__ method of the Variable class. Parameters ---------- elem : tuple of integer, slice, ellipsis or 1-d boolean or integer sequences used to slice the netCDF Variable (Variable[elem]). shape : tuple containing the current shape of the netCDF variable. dimensions : sequence The name of the dimensions. __setitem__. grp : netCDF Group The netCDF group to which the variable being set belongs to. datashape : sequence The shape of the data that is being stored. Only needed by __setitem__ put : True|False (default False). If called from __setitem__, put is True. Returns ------- start : ndarray (..., n) A starting indices array of dimension n+1. The first n dimensions identify different independent data chunks. The last dimension can be read as the starting indices. count : ndarray (..., n) An array of dimension (n+1) storing the number of elements to get. stride : ndarray (..., n) An array of dimension (n+1) storing the steps between each datum. indices : ndarray (..., n) An array storing the indices describing the location of the data chunk in the target/source array (__getitem__/__setitem__). Notes: netCDF data is accessed via the function: nc_get_vars(grpid, varid, start, count, stride, data) Assume that the variable has dimension n, then start is a n-tuple that contains the indices at the beginning of data chunk. count is a n-tuple that contains the number of elements to be accessed. stride is a n-tuple that contains the step length between each element. """ # Adapted from pycdf (http://pysclint.sourceforge.net/pycdf) # by Andre Gosselin.. # Modified by David Huard to handle efficiently fancy indexing with # sequences of integers or booleans. nDims = len(shape) if nDims == 0: nDims = 1 shape = (1, ) # is there an unlimited dimension? (only defined for __setitem__) if put: hasunlim = False unlimd = {} if dimensions: for i in range(nDims): dimname = dimensions[i] # is this dimension unlimited? # look in current group, and parents for dim. dim = _find_dim(grp, dimname) unlimd[dimname] = dim.isunlimited() if unlimd[dimname]: hasunlim = True else: hasunlim = False # When a single array or (non-tuple) sequence of integers is given # as a slice, assume it applies to the first dimension, # and use ellipsis for remaining dimensions. if np.iterable(elem): if type(elem) == np.ndarray or (type(elem) != tuple and \ np.array([_is_int(e) for e in elem]).all()): elem = [elem] for n in range(len(elem) + 1, nDims + 1): elem.append(slice(None, None, None)) else: # Convert single index to sequence elem = [elem] # ensure there is at most 1 ellipse # we cannot use elem.count(Ellipsis), as with fancy indexing would occur # np.array() == Ellipsis which gives ValueError: The truth value of an # array with more than one element is ambiguous. Use a.any() or a.all() if sum(1 for e in elem if e is Ellipsis) > 1: raise IndexError( "At most one ellipsis allowed in a slicing expression") # replace boolean arrays with sequences of integers. newElem = [] IndexErrorMsg=\ "only integers, slices (`:`), ellipsis (`...`), and 1-d integer or boolean arrays are valid indices" i = 0 for e in elem: # string-like object try to cast to int # needs to be done first, since strings are iterable and # hard to distinguish from something castable to an iterable numpy array. if type(e) in [str, bytes, unicode]: try: e = int(e) except: raise IndexError(IndexErrorMsg) ea = np.asarray(e) # Raise error if multidimensional indexing is used. if ea.ndim > 1: raise IndexError("Index cannot be multidimensional") # set unlim to True if dimension is unlimited and put==True # (called from __setitem__) if hasunlim and put and dimensions: try: dimname = dimensions[i] unlim = unlimd[dimname] except IndexError: # more slices than dimensions (issue 371) unlim = False else: unlim = False # convert boolean index to integer array. if np.iterable(ea) and ea.dtype.kind == 'b': # check that boolen array not too long if not unlim and shape[i] != len(ea): msg = """ Boolean array must have the same shape as the data along this dimension.""" raise IndexError(msg) ea = np.flatnonzero(ea) # an iterable (non-scalar) integer array. if np.iterable(ea) and ea.dtype.kind == 'i': # convert negative indices in 1d array to positive ones. ea = np.where(ea < 0, ea + shape[i], ea) if np.any(ea < 0): raise IndexError("integer index out of range") # if unlim, let integer index be longer than current dimension # length. if ea.shape != (0, ): elen = shape[i] if unlim: elen = max(ea.max() + 1, elen) if ea.max() + 1 > elen: msg = "integer index exceeds dimension size" raise IndexError(msg) newElem.append(ea) # integer scalar elif ea.dtype.kind == 'i': newElem.append(e) # slice or ellipsis object elif type(e) == slice or type(e) == type(Ellipsis): if not use_get_vars and type(e) == slice and e.step not in [None,-1,1] and\ dimensions is not None and grp is not None: # convert strided slice to integer sequence if possible # (this will avoid nc_get_vars, which is slow - issue #680). start = e.start if e.start is not None else 0 step = e.step if e.stop is None and dimensions is not None and grp is not None: stop = len(_find_dim(grp, dimensions[i])) else: stop = e.stop if stop < 0: stop = len(_find_dim(grp, dimensions[i])) + stop try: ee = np.arange(start, stop, e.step) if len(ee) > 0: e = ee except: pass newElem.append(e) else: # castable to a scalar int, otherwise invalid try: e = int(e) newElem.append(e) except: raise IndexError(IndexErrorMsg) if type(e) == type(Ellipsis): i += 1 + nDims - len(elem) else: i += 1 elem = newElem # replace Ellipsis and integer arrays with slice objects, if possible. newElem = [] for e in elem: ea = np.asarray(e) # Replace ellipsis with slices. if type(e) == type(Ellipsis): # The ellipsis stands for the missing dimensions. newElem.extend( (slice(None, None, None), ) * (nDims - len(elem) + 1)) # Replace sequence of indices with slice object if possible. elif np.iterable(e) and len(e) > 1: start = e[0] stop = e[-1] + 1 step = e[1] - e[0] try: ee = range(start, stop, step) except ValueError: # start, stop or step is not valid for a range ee = False if ee and len(e) == len(ee) and (e == np.arange(start, stop, step)).all(): # don't convert to slice unless abs(stride) == 1 # (nc_get_vars is very slow, issue #680) if not use_get_vars and step not in [1, -1]: newElem.append(e) else: newElem.append(slice(start, stop, step)) else: newElem.append(e) elif np.iterable(e) and len(e) == 1: newElem.append(slice(e[0], e[0] + 1, 1)) else: newElem.append(e) elem = newElem # If slice doesn't cover all dims, assume ellipsis for rest of dims. if len(elem) < nDims: for n in range(len(elem) + 1, nDims + 1): elem.append(slice(None, None, None)) # make sure there are not too many dimensions in slice. if len(elem) > nDims: raise ValueError( "slicing expression exceeds the number of dimensions of the variable" ) # Compute the dimensions of the start, count, stride and indices arrays. # The number of elements in the first n dimensions corresponds to the # number of times the _get method will be called. sdim = [] for i, e in enumerate(elem): # at this stage e is a slice, a scalar integer, or a 1d integer array. # integer array: _get call for each True value if np.iterable(e): sdim.append(np.alen(e)) # Scalar int or slice, just a single _get call else: sdim.append(1) # broadcast data shape when assigned to full variable (issue #919) try: fullslice = elem.count(slice(None, None, None)) == len(elem) except: # fails if elem contains a numpy array. fullslice = False if fullslice and datashape and put and not hasunlim: datashape = broadcasted_shape(shape, datashape) # pad datashape with zeros for dimensions not being sliced (issue #906) # only used when data covers slice over subset of dimensions if datashape and len(datashape) != len(elem) and\ len(datashape) == sum(1 for e in elem if type(e) == slice): datashapenew = () i = 0 for e in elem: if type(e) != slice and not np.iterable(e): # scalar integer slice datashapenew = datashapenew + (0, ) else: # slice object datashapenew = datashapenew + (datashape[i], ) i += 1 datashape = datashapenew # Create the start, count, stride and indices arrays. sdim.append(max(nDims, 1)) start = np.empty(sdim, dtype=int) count = np.empty(sdim, dtype=int) stride = np.empty(sdim, dtype=int) indices = np.empty(sdim, dtype=object) for i, e in enumerate(elem): ea = np.asarray(e) # set unlim to True if dimension is unlimited and put==True # (called from __setitem__). Note: grp and dimensions must be set. if hasunlim and put and dimensions: dimname = dimensions[i] unlim = unlimd[dimname] else: unlim = False # SLICE # if type(e) == slice: # determine length parameter for slice.indices. # shape[i] can be zero for unlim dim that hasn't been written to # yet. # length of slice may be longer than current shape # if dimension is unlimited (and we are writing, not reading). if unlim and e.stop is not None and e.stop > shape[i]: length = e.stop elif unlim and e.stop is None and datashape != (): try: if e.start is None: length = datashape[i] else: length = e.start + datashape[i] except IndexError: raise IndexError("shape of data does not conform to slice") else: if unlim and datashape == () and len(dim) == 0: # writing scalar along unlimited dimension using slicing # syntax (var[:] = 1, when var.shape = ()) length = 1 else: length = shape[i] beg, end, inc = e.indices(length) n = len(range(beg, end, inc)) start[..., i] = beg count[..., i] = n stride[..., i] = inc indices[..., i] = slice(None) # ITERABLE # elif np.iterable(e) and np.array( e).dtype.kind in 'i': # Sequence of integers start[..., i] = np.apply_along_axis(lambda x: e * x, i, np.ones(sdim[:-1])) indices[..., i] = np.apply_along_axis(lambda x: np.arange(sdim[i]) * x, i, np.ones(sdim[:-1], int)) count[..., i] = 1 stride[..., i] = 1 # all that's left is SCALAR INTEGER # else: if e >= 0: start[..., i] = e elif e < 0 and (-e <= shape[i]): start[..., i] = e + shape[i] else: raise IndexError("Index out of range") count[..., i] = 1 stride[..., i] = 1 indices[..., i] = -1 # Use -1 instead of 0 to indicate that # this dimension shall be squeezed. return start, count, stride, indices #, out_shape
def sample(self, p0, model0, lnprob0=None, lnlike0=None, iterations=1, thin=1, storechain=True): """ Advance the chains ``iterations`` steps as a generator. :param p0: The initial positions of the walkers. Shape should be ``(ntemps, nwalkers, dim)``. :param lnprob0: (optional) The initial posterior values for the ensembles. Shape ``(ntemps, nwalkers)``. :param lnlike0: (optional) The initial likelihood values for the ensembles. Shape ``(ntemps, nwalkers)``. :param iterations: (optional) The number of iterations to preform. :param thin: (optional) The number of iterations to perform between saving the state to the internal chain. :param storechain: (optional) If ``True`` store the iterations in the ``chain`` property. At each iteration, this generator yields * ``p``, the current position of the walkers. * ``lnprob`` the current posterior values for the walkers. * ``lnlike`` the current likelihood values for the walkers. """ p = np.copy(np.array(p0)) # If we have no lnprob or logls compute them if lnprob0 is None or lnlike0 is None: lnprob0, lnlike0 = self._get_lnprob(p, 0) lnprob = lnprob0 lnlike = lnlike0 model = model0 # initialize chain variables self._chain = [[] for ii in range(self.nmodel)] self._lnprob = [[] for ii in range(self.nmodel)] self._lnlikelihood = [[] for ii in range(self.nmodel)] self._modchain = np.zeros(iterations) # do sampling self.iterations = 0 for i in range(iterations): self.iterations += 1 # propose jump in parameter space q, newmod = self._get_jump(p, model) q = np.array(q) # get number of new parameters (can be negative) newpar = np.alen(q) - np.alen(p) # evaluate posterior newlnprob, newlnlike = self._get_lnprob(q, newpar) # MH step diff = newlnprob - lnprob if diff < 0: diff = np.exp(diff) - np.random.rand() if diff >= 0: p = q model = newmod lnprob = newlnprob lnlike = newlnlike self.naccepted[model - 1] += 1 # add count to specific model self._modchain[i] = model self._nmod[model - 1] += 1 # save chain values if (i + 1) % thin == 0: if storechain: self._chain[model - 1].append(p) self._lnprob[model - 1].append(lnprob) self._lnlikelihood[model - 1].append(lnlike) yield p, model, lnprob, lnlike
good_pump = np.array([]) good_air = np.array([]) # omission_cals = np.array([]) omission_odor = [] omission_lick = np.array([]) omission_pump = np.array([]) omission_air = np.array([]) for i in range(win_num): acc_gos = sum(goacc_list[i * 10:(i + 1) * 10]) go_nums = odor_list[i * 10:(i + 1) * 10].count(1) print(go_nums) if acc_gos / go_nums <= 0.5: print('omi!', i) if np.alen(omission_cals) == 0: omission_cals = cal[i * 10:(i + 1) * 10, :] omission_odor = odor_list[i * 10:(i + 1) * 10] omission_lick = lick[i * 10:(i + 1) * 10, :] omission_pump = pump[i * 10:(i + 1) * 10, :] omission_air = airpuff[i * 10:(i + 1) * 10, :] else: omission_cals = np.vstack( (omission_cals, cal[i * 10:(i + 1) * 10, :])) for k in range(len(odor_list[i * 10:(i + 1) * 10])): omission_odor.append(odor_list[i * 10 + k]) omission_lick = np.vstack( (omission_lick, lick[i * 10:(i + 1) * 10, :])) omission_pump = np.vstack( (omission_pump, pump[i * 10:(i + 1) * 10, :])) omission_air = np.vstack(
def div_by_cue(cal_time, cal_data, cue1, cue2, cue3, cue_hz): cue_interval = 1 / cue_hz i = 0 t = 0 trial_start = 0 change_cue1 = np.diff(cue1) cue1_trialnum = np.sum(np.abs(change_cue1) / 2) #print(int(cue1_trialnum)) change_cue2 = np.diff(cue2) cue2_trialnum = np.sum(np.abs(change_cue2) / 2) change_cue3 = np.diff(cue3) cue3_trialnum = np.sum(np.abs(change_cue3) / 2) print("tn", cue1_trialnum, cue2_trialnum, cue3_trialnum) cue1_cal = np.zeros((int(cue1_trialnum), 700)) cue2_cal = np.zeros((int(cue2_trialnum), 700)) cue3_cal = np.zeros((int(cue2_trialnum), 700)) now_cue1 = 0 now_cue2 = 0 now_cue3 = 0 cue_ordor = [] while i < np.alen(change_cue1): if change_cue1[i] == 1: trial_start = round(i / 20) for k in range(trial_start - 20, trial_start + 20): if np.sum(cal_time[0:k]) <= t and np.sum( cal_time[0:k + 1]) > t: if len(cal_data[k:k + 600]) < 600: cue1_cal = np.delete(cue1_cal, now_cue1, 0) print('cue1' + str(now_cue1)) else: cue1_cal[now_cue1, 0:100] = cal_data[k - 100:k] cue1_cal[now_cue1, 100:] = cal_data[k:k + 600] now_cue1 += 1 cue_ordor.append(1) #print('1!') elif change_cue2[i] == 1: trial_start = round(i / 20) for k in range(trial_start - 20, trial_start + 20): if np.sum(cal_time[0:k]) <= t and np.sum( cal_time[0:k + 1]) > t: if len(cal_data[k:k + 600]) < 600: cue2_cal = np.delete(cue2_cal, now_cue2, 0) print('cue2' + str(now_cue2)) else: cue2_cal[now_cue2, 0:100] = cal_data[k - 100:k] cue2_cal[now_cue2, 100:] = cal_data[k:k + 600] # tw = len(cal_data[k:k + 600]) # # except: # print(len(cue2_cal[now_cue2, 100:]), len(cal_data[k:k + 600])) # continue now_cue2 += 1 cue_ordor.append(2) #print('2!') elif change_cue3[i] == 1: trial_start = round(i / 20) for k in range(trial_start - 20, trial_start + 20): if np.sum(cal_time[0:k]) <= t and np.sum( cal_time[0:k + 1]) > t: if len(cal_data[k:k + 600]) < 600: cue3_cal = np.delete(cue3_cal, now_cue3, 0) print('cue3' + str(now_cue3)) else: cue3_cal[now_cue3, 0:100] = cal_data[k - 100:k] cue3_cal[now_cue3, 100:] = cal_data[k:k + 600] now_cue3 += 1 cue_ordor.append(3) t += cue_interval i += 1 #print(cue1_cal,np.mean(cue1_cal)) #print(cue2_cal,np.mean(cue2_cal)) return cue1_cal, cue2_cal, cue3_cal, cue_ordor