def indicate(self, M, trans, i): ''' indicate which M belongs to population i given transition parameter ''' ts = np.insert(np.insert(trans, self.n_pop-1, np.inf), 0, -np.inf) ind = (M>=ts[i]) & (M<ts[i+1]) return ind
def produce_smoothed_images(get_component, replace_component, bins, output_path, paths): start_img = io.imread(paths[0]) start_cdf = get_cdf(get_component(start_img), bins) end_img = io.imread(paths[-1]) end_cdf = get_cdf(get_component(end_img), bins) delta_cdf = end_cdf - start_cdf for i, path in enumerate(paths[1:-1]): percentage = i / len(paths[1:-1]) target_cdf = start_cdf + (delta_cdf * percentage) img = io.imread(path) values = get_component(img) cdf = get_cdf(values, bins) # In order to match the length of "bins" for the interpolation below # we prepend a 0 target_cdf = numpy.insert(target_cdf, 0, 0) cdf = numpy.insert(cdf, 0, 0) matched = match(values, cdf, target_cdf, bins) matched = matched.reshape(values.shape) img = replace_component(img, matched) result_path = os.path.join(output_path, os.path.basename(path)) io.imsave(result_path, img) print('Done with', result_path)
def _get_radius_neighbors(self, query, max_depth, bin_queries, radius): """Finds radius neighbors from the candidates obtained. Their distances from query are smaller than radius. Returns radius neighbors and distances. """ ratio_within_radius = 1 threshold = 1 - self.radius_cutoff_ratio total_candidates = np.array([], dtype=int) total_neighbors = np.array([], dtype=int) total_distances = np.array([], dtype=float) while max_depth > self.min_hash_match and ratio_within_radius > threshold: left_mask = self._left_mask[max_depth] right_mask = self._right_mask[max_depth] candidates = [] for i in range(self.n_estimators): start, stop = _find_matching_indices(self.trees_[i], bin_queries[i], left_mask, right_mask) candidates.extend(self.original_indices_[i][start:stop].tolist()) candidates = np.setdiff1d(candidates, total_candidates) total_candidates = np.append(total_candidates, candidates) ranks, distances = self._compute_distances(query, candidates) m = np.searchsorted(distances, radius, side="right") positions = np.searchsorted(total_distances, distances[:m]) total_neighbors = np.insert(total_neighbors, positions, candidates[ranks[:m]]) total_distances = np.insert(total_distances, positions, distances[:m]) ratio_within_radius = total_neighbors.shape[0] / float(total_candidates.shape[0]) max_depth = max_depth - 1 return total_neighbors, total_distances
def correct_missing_doms(self, scalerarray, no_channels): """ Backup method in case geometry is not given. Very back-of-the-envelope. Not used at the moment. Correcting an artifact of storing variable length arrays in a table. Changes to the SNDAQ geometry removed certain DOMs from the snall data array, so putting them back into the array at the right location. Need to remove last 7 or 8 dummy entries produced when reading data from file and insert zeros at appropriate places in array. :param scalerarray: Scaler array missing DOMs shifted to end :param no_channels: Number of active channels assumed for the file :returns: Scaler array with the correct location mapping """ if no_channels == 5153: return np.insert(scalerarray[:-7], [45, 403, 1308, 1925, 2278, 3594, 4061], 0) elif no_channels == 5152: return np.insert(scalerarray[:-8], [45, 403, 1308, 1925, 2278, 3594, 4061, 5069], 0) else: raise RuntimeError("No. of channels (= %d) is not support" % no_channels)
def cells(self, cells, grid): from lxml import etree as ET if len(cells) == 1: meshio_type = list(cells.keys())[0] num_cells = len(cells[meshio_type]) xdmf_type = meshio_to_xdmf_type[meshio_type][0] topo = ET.SubElement( grid, "Topology", TopologyType=xdmf_type, NumberOfElements=str(num_cells), ) dt, prec = numpy_to_xdmf_dtype[cells[meshio_type].dtype.name] dim = "{} {}".format(*cells[meshio_type].shape) data_item = ET.SubElement( topo, "DataItem", DataType=dt, Dimensions=dim, Format=self.data_format, Precision=prec, ) data_item.text = self.numpy_to_xml_string(cells[meshio_type]) elif len(cells) > 1: total_num_cells = sum(c.shape[0] for c in cells.values()) topo = ET.SubElement( grid, "Topology", TopologyType="Mixed", NumberOfElements=str(total_num_cells), ) total_num_cell_items = sum(numpy.prod(c.shape) for c in cells.values()) dim = total_num_cell_items + total_num_cells # Lines translate to Polylines, and one needs to specify the exact # number of nodes. Hence, prepend 2. if "line" in cells: cells["line"] = numpy.insert(cells["line"], 0, 2, axis=1) dim += len(cells["line"]) dim = str(dim) cd = numpy.concatenate( [ # prepend column with xdmf type index numpy.insert( value, 0, meshio_type_to_xdmf_index[key], axis=1 ).flatten() for key, value in cells.items() ] ) dt, prec = numpy_to_xdmf_dtype[cd.dtype.name] data_item = ET.SubElement( topo, "DataItem", DataType=dt, Dimensions=dim, Format=self.data_format, Precision=prec, ) data_item.text = self.numpy_to_xml_string(cd) return
def insert_initial(val, blankAvg, InducedAvg, LOQ): Initial = initial_pt(val, blankAvg, LOQ) masked_vals = mask_apply(InducedAvg, LOQ, blankAvg) Initial_ACF = Initial/(10E6) sample_ACF = np.insert(masked_vals[0], 0, Initial_ACF) sample_time = np.insert(masked_vals[1], 0, 0) return (sample_ACF, sample_time)
def execEnd(self,eventIdx): # execute an end-breaking or depolymerization event. oligoEndBreak=self.ald['end'][eventIdx/2] leftRight=eventIdx%2*2-1 lr=-(leftRight+1)/2 unitMoving=oligoEndBreak.ends[lr] oligo_vanish,form_oligo,self.event_code=oligoEndBreak.end_break(leftRight,self.units) if form_oligo: # not empty mono=form_oligo['monomer'] if mono: # monomer + monomer (mergeOligo) idx=np.where([x in [mono,unitMoving] for x in self.monomers])[0] self.monomers=np.delete(self.monomers,idx) self.oligos=np.insert(self.oligos,0,form_oligo['oligo']) else: # monomer + multimer (mergeOligo) idx=np.where([unitMoving is x for x in self.monomers])[0] self.monomers=np.delete(self.monomers,idx) else: #empty, add the end to monomers self.monomers=np.insert(self.monomers,0,unitMoving) unitMoving.energize() if oligo_vanish: idx=np.where([oligoEndBreak is x for x in self.oligos])[0] self.oligos=np.delete(self.oligos,idx) idx=np.where([unitMoving is not x for x in oligoEndBreak.subunits])[0] nonmoving_unit=oligoEndBreak.subunits[idx[0]] self.monomers=np.insert(self.monomers,0,nonmoving_unit) nonmoving_unit.energize()
def load_from_classifier(classifier): forest0 = load_forest_from_classifier(classifier, 'forest0.npz') hist0 = load_ndarray(classifier, 'hist0.npy') prior = np.true_divide(hist0[0].sum(axis=0), hist0[0].sum()) hist0 = np.insert(normalize(hist0), 0, 0, axis=0) forest1 = load_forest_from_classifier(classifier, 'forest1.npz') hist1 = load_ndarray(classifier, 'hist1.npy') hist1 = np.insert(normalize(hist1), 0, 0, axis=0) svmmodels = [] try: training_bosts = normalize(load_ndarray(classifier, 'bosts.npy')).T NLABELS = hist0.shape[2] for i in range(1, NLABELS): model = classifier.read('svmmodel%d' % i) tmp = tempfile.NamedTemporaryFile() tmp.write(model) tmp.flush() svmmodels.append(load_model(tmp.name)) tmp.close() except KeyError: training_bosts = None return forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior
def value_counts(self, dropna=True): """ Returns a Series containing counts of unique values. Parameters ---------- dropna : boolean, default True Don't include counts of NaN, even if NaN is in sp_values. Returns ------- counts : Series """ keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna) fcounts = self.sp_index.ngaps if fcounts > 0: if self._null_fill_value and dropna: pass else: if self._null_fill_value: mask = pd.isnull(keys) else: mask = keys == self.fill_value if mask.any(): counts[mask] += fcounts else: keys = np.insert(keys, 0, self.fill_value) counts = np.insert(counts, 0, fcounts) if not isinstance(keys, pd.Index): keys = pd.Index(keys) result = pd.Series(counts, index=keys) return result
def _prepend_image(self, document, im, rtn_length, column_dtypes, column_set, columns): image = im[IMAGE] first_dt = im[IMAGE_TIME] if not first_dt.tzinfo: first_dt = first_dt.replace(tzinfo=mktz('UTC')) document[INDEX] = np.insert(document[INDEX], 0, np.uint64(datetime_to_ms(first_dt))) for field in image: if field == INDEX: continue if columns and field not in columns: continue if field not in document or document[field] is None: col_dtype = np.dtype(str if isinstance(image[field], string_types) else 'f8') document[field] = self._empty(rtn_length, dtype=col_dtype) column_dtypes[field] = col_dtype column_set.add(field) val = image[field] document[field] = np.insert(document[field], 0, document[field].dtype.type(val)) # Now insert rows for fields in document that are not in the image for field in set(document).difference(set(image)): if field == INDEX: continue logger.debug("Field %s is missing from image!" % field) if document[field] is not None: val = np.nan document[field] = np.insert(document[field], 0, document[field].dtype.type(val)) return document
def calculate(self): ephem_location = ephem.Observer() ephem_location.lat = self.location.latitude.to(u.rad) / u.rad ephem_location.lon = self.location.longitude.to(u.rad) / u.rad ephem_location.elevation = self.location.height / u.meter ephem_location.date = ephem.Date(self.time.datetime) if self.data is None: self.alt = Latitude([], unit=u.deg) self.az = Longitude([], unit=u.deg) self.names = Column([], dtype=np.str) self.vmag = Column([]) else: ra = Longitude(self.data["ra"], u.h) dec = Latitude(self.data["dec"], u.deg) c = SkyCoord(ra, dec, frame='icrs') altaz = c.transform_to(AltAz(obstime=self.time, location=self.location)) self.alt = altaz.alt self.az = altaz.az self.names = self.data['name'] self.vmag = self.data['mag'] for ephemeris in self.ephemerides: ephemeris.compute(ephem_location) self.vmag = np.insert(self.vmag, [0], ephemeris.mag) self.alt = np.insert(self.alt, [0], (ephemeris.alt.znorm * u.rad).to(u.deg)) self.az = np.insert(self.az, [0], (ephemeris.az * u.rad).to(u.deg)) self.names = np.insert(self.names, [0], ephemeris.name) return self.names, self.vmag, self.alt, self.az
def calcEarthParams(layerThickness, layerResistivity): """""" nLayers = len(layerResistivity["min"]) # or 'max' thicknessParam = np.empty((nLayers,)) resistivityParam = np.empty((nLayers,)) # Iterate through the layers, applying the p formula to both # thickness and resistivity for i in range(nLayers): # Generate a random number to control where in the range of # possible values the true value of p could lie. This precedes the # MC iteration, so take one p value with a grain of salt, but many # with a salt shaker randomNumber = np.random.random_sample() if i < (nLayers - 1): # Skip last depth (infinite) thicknessP = (layerThickness["max"][i] - layerThickness["min"][i]) * randomNumber + layerThickness["min"][i] thicknessParam = np.insert(thicknessParam, i, thicknessP) del thicknessP resistivityP = (layerResistivity["max"][i] - layerResistivity["min"][i]) * randomNumber + layerResistivity[ "min" ][i] resistivityParam = np.insert(resistivityParam, i, resistivityP) del resistivityP return (thicknessParam[: nLayers - 1], resistivityParam[:nLayers])
def trainNN(self, imagesTrainSet, labelsTrainSet, etha): self.reset_weights() trainingSetSize = labelsTrainSet.shape[0]; j = 0 while j < 30: i = 0 # print("Round: " + str(j + 1)) while i < trainingSetSize : x = imagesTrainSet[i].ravel() # Convert 28x28 pixel image into a (784,) vector x = np.array([ 0 if val == 0 else 1 for val in x ]) x_a = np.insert(x, 0, values=1, axis=0) # Augmented Feature vector net_hidd = np.dot(self.w1, x_a) y = self.signum(net_hidd) y_a = np.insert(y, 0, values=1, axis=0) # Augmented Feature vector net_out = np.dot(self.w2, y_a) z = self.signum(net_out) lab = np.array([ 1 if k == self.labels[i] else 0 for k in range(10) ]) J = z - lab; J = np.sum(0.5 * J * J); if J < 1 and self.enableWeightDecay: break; out_sensitivity = (lab - z) * self.signum_prime(net_out) net_hidd_prime = self.signum_prime(net_hidd) hid_sensitivity = np.dot(self.w2.T, out_sensitivity) * np.insert(net_hidd_prime, 0, 1) grad_hidd_out = etha * np.outer(out_sensitivity, y_a.T) grad_in_hidd = etha * np.outer(hid_sensitivity[1:] , x_a.T) self.update_weights_bias(grad_in_hidd, grad_hidd_out) i += 1 j += 1 return self.w1, self.w2
def fit(self, X, sample_weight=None, **kwargs): # Checks X = check_array(X) if sample_weight is not None and len(sample_weight) != len(X): raise ValueError # Compute histogram and edges h, e = np.histogramdd(X, bins=self.bins, range=self.range, weights=sample_weight, normed=True) # Add empty bins for out of bound samples for j in range(X.shape[1]): h = np.insert(h, 0, 0., axis=j) h = np.insert(h, h.shape[j], 0., axis=j) e[j] = np.insert(e[j], 0, -np.inf) e[j] = np.insert(e[j], len(e[j]), np.inf) if X.shape[1] == 1 and self.interpolation: inputs = e[0][2:-1] - (e[0][2] - e[0][1]) / 2. inputs[0] = e[0][1] inputs[-1] = e[0][-2] outputs = h[1:-1] self.interpolation_ = interp1d(inputs, outputs, kind=self.interpolation, bounds_error=False, fill_value=0.) self.histogram_ = h self.edges_ = e self.ndim_ = X.shape[1] return self
def forwardPropPredict(nn_params, input_layer_size, hidden_layer_size, num_labels, X): length1 = (input_layer_size+1)*(hidden_layer_size) nn1 = nn_params[:length1] T1 = nn1.reshape((hidden_layer_size, input_layer_size+1)) nn2 = nn_params[length1:] T2 = nn2.reshape((num_labels, 1+ hidden_layer_size)) m = X.shape[0] # number of training examples, useful for calculations max_pred = 0 predictions = [] # for each training example train_ex = -1 # training example number we're on (ie. which row of input matrix) for x in X: train_ex += 1 # forward propagation a1 = x a1 = np.insert(a1, 0, 1, axis=0) z2 = np.dot(T1, a1) a2 = sigmoid(z2) a2 = np.insert(a2, 0 , 1, axis=0) z3 = np.dot(T2, a2) a3 = sigmoid(z3) predictions.append(int(np.argmax(a3))) return predictions
def _summarize_simulations(self, lmda, sim_vector, date_index, h, past_values): """ Summarizes a simulation vector and a mean vector of predictions Parameters ---------- lmda : np.array Past volatility values for the moedl sim_vector : np.array N simulation predictions for h-step ahead forecasts date_index : pd.DateIndex or np.array Dates for the simulations h : int How many steps ahead are forecast past_values : int How many past observations to include in the forecast plot intervals : Boolean Would you like to show prediction intervals for the forecast? """ mean_values = np.append(lmda, np.array([np.mean(i) for i in sim_vector])) error_bars = [] for pre in range(5,100,5): error_bars.append(np.insert([np.percentile(i,pre) for i in sim_vector], 0, mean_values[-h-1])) forecasted_values = np.insert([np.mean(i) for i in sim_vector], 0, mean_values[-h-1]) plot_values = mean_values[-h-past_values:] plot_index = date_index[-h-past_values:] return error_bars, forecasted_values, plot_values, plot_index
def next(self): totim,dt,kper,kstp,swrstp,success = self.read_header() if success == False: # print 'SWR_Stage.next() object reached end of file' return 0.0,0.0,0,0,0,False,self.null_record else: if self.type > 0: #r = numpy.zeros((self.items+1)) r = numpy.zeros((self.items+2)) for rec in range(0,self.nrecord): #nlay = self.read_integer() nlay = self.reachlayers[rec] for lay in range(0,nlay): this_lay = self.read_integer() this_items = self.read_items() this_r = numpy.insert(this_items,[0],this_lay) this_r = numpy.insert(this_r,[0],rec+1) #print totim,this_lay,numpy.shape(r),numpy.shape(this_r) r = numpy.vstack((r,this_r)) r = numpy.delete(r,0,axis=0) return totim,dt,kper,kstp,swrstp,True,r else: r = self.read_record() # print 'SWR data read for time step ',kstp,',stress period \ # ',kper,'and swr step ',swrstp return totim,dt,kper,kstp,swrstp,True,r
def insert(self, obj, values): """ Insert values before the given indices in the column and return a new `~astropy.table.Column` object. Parameters ---------- obj : int, slice or sequence of ints Object that defines the index or indices before which ``values`` is inserted. values : array_like Value(s) to insert. If the type of ``values`` is different from that of quantity, ``values`` is converted to the matching type. ``values`` should be shaped so that it can be broadcast appropriately Returns ------- out : `~astropy.table.Column` A copy of column with ``values`` and ``mask`` inserted. Note that the insertion does not occur in-place: a new column is returned. """ if self.dtype.kind == 'O': # Even if values is array-like (e.g. [1,2,3]), insert as a single # object. Numpy.insert instead inserts each element in an array-like # input individually. data = np.insert(self, obj, None, axis=0) data[obj] = values else: # Explicitly convert to dtype of this column. Needed because numpy 1.7 # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+. values = np.asarray(values, dtype=self.dtype) data = np.insert(self, obj, values, axis=0) out = data.view(self.__class__) out.__array_finalize__(self) return out
def cdf_vals_from_data(data, numbins=None, maxbins=None): # make sure data is a numpy array data = numpy.array(data) # by default, use numbins equal to number of distinct values # TODO: shouldn't this be one per possible x val? if numbins == None: numbins = numpy.unique(data).size if maxbins != None and numbins > maxbins: numbins = maxbins # bin the data and count fraction of points in each bin (for PDF) rel_bin_counts, min_bin_x, bin_size, _ =\ stats.relfreq(data, numbins, (data.min(), data.max())) # bin the data and count each bin (cumulatively) (for CDF) cum_bin_counts, min_bin_x, bin_size, _ =\ stats.cumfreq(data, numbins, (data.min(), data.max())) # normalize bin counts so rightmost count is 1 cum_bin_counts /= cum_bin_counts.max() # make array of x-vals (lower end of each bin) x_vals = numpy.linspace(min_bin_x, min_bin_x+bin_size*numbins, numbins) # CDF always starts at y=0 cum_bin_counts = numpy.insert(cum_bin_counts, 0, 0) # y = 0 cdf_x_vals = numpy.insert(x_vals, 0, x_vals[0]) # x = min x return cum_bin_counts, cdf_x_vals, rel_bin_counts, x_vals
def insert(array, obj, values): """Insert values along the given axis before the given indices. Parameters: ----------- arr : array_like Input array. obj : int, slice or sequence of ints Object that defines the index or indices before which values is inserted. values : array_like Values to insert into arr. If the type of values is different from that of arr, values is converted to the type of arr. axis : int, optional Axis along which to insert values. If axis is None then arr is flattened first. Returns: -------- out : ndarray A copy of arr with values inserted. Note that insert does not occur in-place: a new array is returned. If axis is None, out is a flattened array. """ if isphysicalquantity(array): return np.insert(array.value, obj, values.value) * q[array.unit] else: return np.insert(array, obj, values)
def loess_query(x_query, X, y, alpha): if not isinstance(x_query, np.ndarray): x_query = np.array(x_query) elif isinstance(x_query, np.matrix): x_query = x_query.A if not isinstance(X, np.matrix): raise TypeError, 'X must be of type np.matrix' if isinstance(y, np.ndarray): y = np.mat(y).T if alpha <= 0 or alpha > 1: raise ValueError, 'ALPHA must be between 0 and 1' # inserting constant ones into X and X_QUERY for intercept term X = np.insert(X, obj=0, values=1, axis=1) x_query = np.insert(x_query, obj=0, values=1) # computing weights matrix using a tricube weight function W = weights_matrix(x_query, X, alpha) # computing theta from closed form solution to locally weighted linreg theta = (X.T * W * X).I * X.T * W * y # returning prediction return np.matrix.dot(theta.A.T, x_query)
def transform(self, pos=(0,0), angle=0, scale=1): '''In-plane transformation function. Update the 3D transform based on the 2D changes''' center = self.shape * self.spacing / 2. + (self.shape + 1) % 2 * self.spacing / 2. inv = self.xfm.transform.homogeneous_inverse wpos = self.handle.center.representation.world_position wpos -= center if not isinstance(scale, (tuple, list, np.ndarray)): scale = [scale, scale] if self.axis == 1: trans = np.insert(pos[:2][::-1], self.axis, 0) wpos = np.insert(wpos[:2][::-1], self.axis, self.ipw_3d.ipw.slice_position) #angle = -angle else: trans = np.insert(pos[:2], self.axis, 0) wpos = np.insert(wpos[:2], self.axis, self.ipw_3d.ipw.slice_position) scale = np.insert(scale, self.axis, 1) self.parent._undolist.append(self.xfm.transform.matrix.to_array()) self.xfm.transform.post_multiply() self.xfm.transform.translate(-wpos) self.xfm.transform.rotate_wxyz(np.degrees(angle), *self.ipw_3d.ipw.normal) self.xfm.transform.scale(scale) self.xfm.transform.translate(wpos) self.xfm.transform.translate(trans) self.xfm.transform.pre_multiply() self.xfm.widget.set_transform(self.xfm.filter.transform) self.xfm.update_pipeline() self.parent.update_slabs() np.save("/tmp/last_xfm.npy", self.parent.get_xfm())
def eta2direct(self, x): """eta2direct(x) Args: - x (``array-like``): a chromosome encoding an MGA trajectory in the eta encoding Returns: ``numpy.array``: a chromosome encoding the MGA trajectory using the direct encoding Raises: - ValueError: when the tof_encoding is not 'eta' """ if self.tof_encoding is not 'eta': raise ValueError( "cannot call this method if the tof_encoding is not 'eta'") # decision vector is [t0, n1, n2, n3, ... ] n = len(x) - 1 dt = self.tof T = [0] * n T[0] = dt * x[1] for i in range(1, len(T)): T[i] = (dt - sum(T[:i])) * x[i + 1] np.insert(T, 0, [0]) return T
def hawkesfeat(timeseries,args): ''' Generate hawkes feature: positive rate/negtive rate args['params']: 1X8 ndarray containing the params of hawkes process ''' #Assign parameters params = args['params'] if 'params' in args.keys() else np.array([0.2,0.2, 0.2, 0.7, 0.7, 0.2, 1.0, 1.0]) #Utilize the rate calculation function in the hawkes simulator sim = simulator(theta = params) sim.sethistory(timeseries) rate = sim.historydata[:,2]/sim.historydata[:,3] rate = np.insert(rate,0,params[0]/params[1]).reshape(-1,1) time = np.insert(sim.historydata[:,0],0,0.0).reshape(-1,1) time = np.cumsum(time,axis=0) value = np.hstack((time,rate)) value = value.astype(object,copy=False) value[:,0] = Vsecond2delta(value[:,0]) anchor = timeseries.values[0] anchor[1] = 0.0 value = value + anchor rateseries = pd.DataFrame(value,columns=['time','quantity']) rateseries.index = rateseries['time'] rateseries = rateseries.reindex(timeseries.index,method = 'ffill') return rateseries
def balance_workload(nproc, popsize, *index, **kwds): """divide popsize elements on 'nproc' chunks nproc: int number of nodes popsize: int number of jobs index: int rank of node(s) to calculate for (using slice notation) skip: int rank of node upon which to not calculate (i.e. the master) returns (begin, end) index vectors""" _skip = False skip = kwds.get('skip', None) if skip is not None and skip < nproc: nproc = nproc - 1 _skip = True count = np.round(popsize/nproc) counts = count * np.ones(nproc, dtype=np.int) diff = popsize - count*nproc counts[:diff] += 1 begin = np.concatenate(([0], np.cumsum(counts)[:-1])) #return counts, index #XXX: (#jobs, begin index) for all elements if _skip: if skip == nproc: # remember: nproc has been reduced begin = np.append(begin, begin[-1]+counts[-1]) counts = np.append(counts, 0) else: begin = np.insert(begin, skip, begin[skip]) counts = np.insert(counts, skip, 0) if not index: return begin, begin+counts #XXX: (begin, end) index for all elements #if len(index) > 1: # return lookup((begin, begin+counts), *index) # index a slice return lookup((begin, begin+counts), *index) # index a single element
def chans(self, invert=False): """ Method to convert the bit mask into a string of channel ranges in CASA format. e.g. [3,10],[25,50] => "3~10;25~50" Parameters ---------- None Returns ------- string containing the formatted channel ranges """ output = "" if invert: basechan = np.append(1-self._chans, 0) shiftchan = np.insert(1-self._chans, 0, 0) else: basechan = np.append(self._chans, 0) shiftchan = np.insert(self._chans, 0, 0) diff = basechan - shiftchan st = np.where(diff == 1)[0] en = np.where(diff == -1)[0] first = True for seg in zip(st, en): if not first: output += ";" else: first = False output += str(seg[0] + self._startchan) + "~" + str(seg[1] - 1 + self._startchan) return output
def data_concatenate(list_data_neuro): """ Tool function for blk_align_to_evt, make sure they contains the same number of signals :param list_data_neuro: a list of data_neuro :return: concatenated data_neuro """ data_neuro_all = {} for i, data_neuro in enumerate(list_data_neuro): if i == 0: # if the first block, copy it data_neuro_all = data_neuro else: # for next incoming blocks if len(data_neuro['ts']) == len(data_neuro_all['ts']): # check if ts length matches, otherwise raise error # check if signals match, if not match, fill the missing signal with all zeros if not np.array_equal(data_neuro['signal_info'], data_neuro_all['signal_info']): for indx_signal_new, signal_new in enumerate(data_neuro['signal_info']): # if emerging signal if signal_new not in data_neuro_all['signal_info']: data_neuro_all['signal_info'] = np.insert(data_neuro_all['signal_info'], indx_signal_new, signal_new) data_neuro_all['data'] = np.insert(data_neuro_all['data'], indx_signal_new, 0.0, axis=2) for indx_signal_old, signal_old in enumerate(data_neuro_all['signal_info']): # if mising signal if signal_old not in data_neuro['signal_info']: data_neuro['signal_info'] = np.insert(data_neuro['signal_info'], indx_signal_old, signal_old) data_neuro['data'] = np.insert(data_neuro['data'], indx_signal_old, 0.0, axis=2) # concatenate data_neuro_all['data'] = np.concatenate((data_neuro_all['data'], data_neuro['data']), axis=0) else: print('function data_concatenate can not work with data of different "ts" length') warnings.warn('function data_concatenate can not work with data of different "ts" length') return data_neuro_all
def get_affine_inliers_RANSAC(num_m, xy1_m, xy2_m,\ acd1_m, acd2_m, xy_thresh_sqrd, sigma_thresh_sqrd=None): '''Computes initial inliers by iteratively computing affine transformations between matched keypoints''' aff_inliers = [] # Enumerate All Hypothesis (Match transformations) for mx in xrange(num_m): xy1 = xy1_m[:,mx].reshape(2,1) # XY Positions xy2 = xy2_m[:,mx].reshape(2,1) A1 = matrix(insert(acd1_m[:,mx], [1.], 0.)).reshape(2,2) A2 = matrix(insert(acd2_m[:,mx], [1.], 0.)).reshape(2,2) # Compute Affine Tranform # from img1 to img2 = (E2\E1) Aff = linalg.inv(A2).dot(A1) # # Transform XY-Positions xy1_mAt = xy2 + Aff.dot( (xy1_m - xy1) ) xy_err_sqrd = sum( power(xy1_mAt - xy2_m, 2) , 0) _inliers = find(xy_err_sqrd < xy_thresh_sqrd) # # Transform Ellipse Geometry (solved on paper) if not sigma_thresh_sqrd is None: scale1_mAt = (acd1_m[0]*Aff[0,0]) *\ (acd1_m[1]*Aff[1,0]+acd1_m[2]*Aff[1,1]) scale2_m = acd2_m[0] * acd2_m[2] scale_err = np.abs(scale1_mAt - scale2_m) _inliers_scale = find(scale_err < sigma_thresh_sqrd) _inliers = np.bitwise_and(_inliers, _inliers_scale) #If this hypothesis transformation is better than the ones we have #previously seen then set it as the best if len(_inliers) > len(aff_inliers): aff_inliers = _inliers #bst_xy_err = xy_err_sqrd return aff_inliers
def set_params(self): r""" Internally, scipy.signal works with systems of the form .. math:: ar_{poly}(L) X_t = ma_{poly}(L) \epsilon_t where L is the lag operator. To match this, we set .. math:: ar_{poly} = (1, -\phi_1, -\phi_2,..., -\phi_p) ma_{poly} = (1, \theta_1, \theta_2,..., \theta_q) In addition, ar_poly must be at least as long as ma_poly. This can be achieved by padding it out with zeros when required. """ # === set up ma_poly === # ma_poly = np.asarray(self._theta) self.ma_poly = np.insert(ma_poly, 0, 1) # The array (1, theta) # === set up ar_poly === # if np.isscalar(self._phi): ar_poly = np.array(-self._phi) else: ar_poly = -np.asarray(self._phi) self.ar_poly = np.insert(ar_poly, 0, 1) # The array (1, -phi) # === pad ar_poly with zeros if required === # if len(self.ar_poly) < len(self.ma_poly): temp = np.zeros(len(self.ma_poly) - len(self.ar_poly)) self.ar_poly = np.hstack((self.ar_poly, temp))
def polyadd(p1,p2): s1 = np.size(p1) s2 = np.size(p2) length = max(s1,s2) p1 = np.insert(p1,np.zeros( length-s1 >= 0 and length-s1 or 0),0) p2 = np.insert(p2,np.zeros( length-s2 >= 0 and length-s2 or 0),0) return p1+p2
np.sort(arr, order=['grad_year', 'gpa'])) # NUMPY FUNCTIONS FOR APPENDING ARRAYS BY INSERTING,DELETING,JOINING, # SPLITTING,CHANGING SHAPE AND CONVERTING ARRAY TO A DIFFERENT TYPE my_array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.int64) my_2d_array = my_array new_array = np.append(my_array, [7, 8, 9, 10]) new_2darray = np.append(my_2d_array, [[7], [8]], axis=1) # you switch around the shape of the array # Print `my_2d_array` print(my_2d_array) # print the inserted element in my_array np.insert(my_array, 1, 5) print("Array after inserting: \n", my_array) # print the deleted element in my array np.delete(my_array, [1]) print("Array after deleting an element: \n", my_array) # Select elements at (1,0), (0,1), (1,2) and (0,0) print("Elements at(1,0), (0,1), (1,2) and (0,0) of my_2d_array :\n", my_2d_array[[1, 0, 1, 0], [0, 1, 2, 0]]) # Select a subset of the rows and columns print("Subset of my_2d_array : \n", my_2d_array[[1, 0, 1, 0]][:, [0, 1, 2, 0]]) # ARRAY INDEXING index_array = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
def update(self, map, state, PLAN, world_view): # this flag is set for one time step to signal a redraw in the viewer # planner_flag = 1 # return simple waypoint path # planner_flag = 2 # return dubins waypoint path # planner_flag = 3 # plan path through city using straight-line RRT # planner_flag = 4 # plan path through city using dubins RRT planner_flag = 5 # plan path through city using modified dubins RRT if planner_flag == 1: self.waypoints.type = 'fillet' self.waypoints.num_waypoints = 4 Va = 25 self.waypoints.ned[:, 0:self.waypoints.num_waypoints] \ = np.array([[0, 0, -100], [1000, 0, -100], [0, 1000, -100], [1000, 1000, -100]]).T self.waypoints.airspeed[:, 0:self.waypoints.num_waypoints] \ = np.array([[Va, Va, Va, Va]]) elif planner_flag == 2: self.waypoints.type = 'dubins' self.waypoints.num_waypoints = 4 Va = 25 self.waypoints.ned[:, 0:self.waypoints.num_waypoints] \ = np.array([[0, 0, -100], [1000, 0, -100], [0, 1000, -100], [1000, 1000, -100]]).T self.waypoints.airspeed[:, 0:self.waypoints.num_waypoints] \ = np.array([[Va, Va, Va, Va]]) self.waypoints.course[:, 0:self.waypoints.num_waypoints] \ = np.array([[np.radians(0), np.radians(45), np.radians(45), np.radians(-135)]]) elif planner_flag == 3: self.waypoints.type = 'fillet' self.waypoints.num_waypoints = 0 Va = 25 primaryWaypoints = np.array([[0., 0., -100.], [2000., 0., -100.], [0., 1200., -100.], [3000., 3000., -100.]]).T primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]]) for i in range(0, np.size(primaryWaypoints,1)): # current configuration vector format: N, E, D, Va if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > 150: wpp_start = np.array([state.pn, state.pe, primaryWaypoints[0,0], state.Va]) self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3] self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(3) self.waypoints.num_waypoints += 1 elif i == 0: self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0], primaryWaypoints[1, 0], primaryWaypoints[2, 0]]) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0) self.waypoints.num_waypoints += 1 continue else: wpp_start = np.array([primaryWaypoints[0,i-1], primaryWaypoints[1,i-1], primaryWaypoints[2,i-1], primaryWaypointsAirspeed.item(i-1)]) wpp_end = np.array([primaryWaypoints[0, i], primaryWaypoints[1, i], primaryWaypoints[2, i], primaryWaypointsAirspeed.item(i)]) waypoints = self.rrt.planPath(wpp_start, wpp_end, map) numNew = waypoints.num_waypoints-1 numOld = self.waypoints.num_waypoints if numNew >1: self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1] self.waypoints.airspeed[:,numOld:numOld + numNew] = wpp_end.item(3) * np.ones((1, numNew)) else: self.waypoints.ned[:, numOld] = waypoints.ned[:, 1] self.waypoints.airspeed[:,numOld] = wpp_end.item(3) self.waypoints.num_waypoints = numNew + numOld elif planner_flag == 4: self.waypoints.type = 'dubins' self.waypoints.num_waypoints = 0 Va = 25 numberWaypoints = 4 primaryWaypoints = np.array([[0., 0., -100.], [2000., 0., -100.], [0., 1200., -100.], [3000., 3000., -100.]]).T primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]]) primaryCourseAngles = np.array([[np.radians(0), np.radians(45), np.radians(45), np.radians(-135)]]) # numberWaypoints = 2 # primaryWaypoints = np.array([[0., 0., -100.], # [1000., 0., -100.]]).T # primaryWaypointsAirspeed = np.array([[Va, Va]]) # primaryCourseAngles = np.array([[np.radians(0), # np.radians(45)]]) for i in range(0, numberWaypoints): # current configuration vector format: N, E, D, Va if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > PLAN.R_min: wpp_start = np.array([state.pn, state.pe, -state.h, state.chi, state.Va]) self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3] self.waypoints.course[:, self.waypoints.num_waypoints] = wpp_start.item(3) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(4) self.waypoints.num_waypoints += 1 elif i == 0: self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0], primaryWaypoints[1, 0], primaryWaypoints[2, 0]]) self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(0) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0) self.waypoints.num_waypoints += 1 continue else: wpp_start = np.array([primaryWaypoints[0, i - 1], primaryWaypoints[1, i - 1], primaryWaypoints[2, i - 1], primaryCourseAngles.item(i-1), primaryWaypointsAirspeed.item(i-1)]) wpp_end = np.array([primaryWaypoints[0, i], primaryWaypoints[1, i], primaryWaypoints[2, i], primaryCourseAngles.item(i), primaryWaypointsAirspeed.item(i)]) waypoints = self.rrtDubins.planPath(wpp_start, wpp_end, PLAN.R_min, map) numNew = waypoints.num_waypoints-1 numOld = self.waypoints.num_waypoints if numNew > 1: self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1] self.waypoints.course[:,numOld:numOld + numNew] = waypoints.course[:,1:numNew+1] self.waypoints.airspeed[:, numOld:(numOld + numNew)] = wpp_end.item(4) * np.ones((1, numNew)) else: self.waypoints.ned[:, numOld] = waypoints.ned[:, 1] self.waypoints.course[:, numOld] = waypoints.course[:, 1] self.waypoints.airspeed[:, numOld] = wpp_end.item(4) * np.ones((1, numNew)) self.waypoints.num_waypoints = numNew + numOld elif planner_flag == 5: # self.waypoints.type = ['dubins','dubins','dubins','dubins'] self.waypoints.type = ['straight_line'] # self.waypoints.type = 'dubins' self.waypoints.num_waypoints = 0 Va = 25 numberWaypoints = 4 primaryWaypoints = np.array([[0., 0., -100.], [2000., 0., -100.], [0., 1500., -100.], [3200., 3200., -100.]]).T world_view.drawWaypointPoints(primaryWaypoints) primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]]) primaryCourseAngles = np.array([[np.radians(0), np.radians(45), np.radians(45), np.radians(-135)]]) # Make new points before the real waypoints. In line with chi from previous waypoint pointing. #At least radius open from collision?? Or just check collision? j = 0 prevChi = 0 while j < numberWaypoints-1: chi = np.arctan2((primaryWaypoints[1,j+1] - primaryWaypoints[1,j]), (primaryWaypoints[0,j+1] - primaryWaypoints[0,j])) primaryCourseAngles[:,j] = prevChi+.01 distBetween = PLAN.R_min*2 newWay = primaryWaypoints[:,j+1] - distBetween * np.array([np.cos(chi), np.sin(chi), 0.]).T primaryWaypoints = np.insert(primaryWaypoints, j+1, newWay, 1) primaryWaypointsAirspeed = np.insert(primaryWaypointsAirspeed, j + 1, Va, 1) primaryCourseAngles = np.insert(primaryCourseAngles, j + 1, chi, 1) numberWaypoints += 1 prevChi = chi j += 2 #Make sure far enough apart for i in range(0, numberWaypoints): # current configuration vector format: N, E, D, Va if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > PLAN.R_min: even = True wpp_start = np.array([state.pn, state.pe, -state.h, state.chi, state.Va]) self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3] self.waypoints.course[:, self.waypoints.num_waypoints] = wpp_start.item(3) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(4) self.waypoints.num_waypoints += 1 elif i == 0: even = True self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0], primaryWaypoints[1, 0], primaryWaypoints[2, 0]]) self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(0) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0) self.waypoints.num_waypoints += 1 continue else: if even: even = False else: even = True wpp_start = np.array([primaryWaypoints[0, i - 1], primaryWaypoints[1, i - 1], primaryWaypoints[2, i - 1], primaryCourseAngles.item(i-1), primaryWaypointsAirspeed.item(i-1)]) wpp_end = np.array([primaryWaypoints[0, i], primaryWaypoints[1, i], primaryWaypoints[2, i], primaryCourseAngles.item(i), primaryWaypointsAirspeed.item(i)]) if even and i != 0: self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, i], primaryWaypoints[1, i], primaryWaypoints[2, i]]) self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(i) self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(i) self.waypoints.type.append('straight_line') self.waypoints.num_waypoints += 1 continue waypoints = self.rrtDubinsProj.planPath(wpp_start, wpp_end, PLAN.R_min, map) numNew = waypoints.num_waypoints-1 numOld = self.waypoints.num_waypoints if numNew > 1: self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1] self.waypoints.course[:,numOld:numOld + numNew] = waypoints.course[:,1:numNew+1] self.waypoints.airspeed[:, numOld:(numOld + numNew)] = wpp_end.item(4) * np.ones((1, numNew)) for newI in range(0, numNew-1): self.waypoints.type.append('dubins') else: self.waypoints.ned[:, numOld] = waypoints.ned[:, 1] self.waypoints.course[:, numOld] = waypoints.course[:, 1] self.waypoints.airspeed[:, numOld] = wpp_end.item(4) * np.ones((1, numNew)) if even: self.waypoints.type.append('straight_line') else: self.waypoints.type.append('dubins') self.waypoints.num_waypoints = numNew + numOld else: print("Error in Path Planner: Undefined planner type.") return self.waypoints
def calculate_ptdf(branches, buses, index_set_branch, index_set_bus, reference_bus, base_point=BasePointType.FLATSTART, sparse_index_set_branch=None, mapping_bus_to_idx=None): """ Calculates the sensitivity of the voltage angle to real power injections Parameters ---------- branches: dict{} The dictionary of branches for the test case buses: dict{} The dictionary of buses for the test case index_set_branch: list The list of keys for branches for the test case index_set_bus: list The list of keys for buses for the test case reference_bus: key value The reference bus key value base_point: egret.model_library_defn.BasePointType The base-point type for calculating the PTDF matrix sparse_index_set_branch: list The list of keys for branches needed to compute a sparse PTDF matrix If this is None, a dense PTDF matrix is returned mapping_bus_to_idx: dict A map from bus names to indices for matrix construction. If None, will be inferred from index_set_bus. """ _len_bus = len(index_set_bus) if mapping_bus_to_idx is None: mapping_bus_to_idx = { bus_n: i for i, bus_n in enumerate(index_set_bus) } _len_branch = len(index_set_branch) _ref_bus_idx = mapping_bus_to_idx[reference_bus] ## check if the network is connected graph = construct_connection_graph(branches, mapping_bus_to_idx) connected = check_network_connection(graph, index_set_bus) J = _calculate_J11(branches, buses, index_set_branch, index_set_bus, mapping_bus_to_idx, base_point, approximation_type=ApproximationType.PTDF) A = calculate_adjacency_matrix_transpose(branches, index_set_branch, index_set_bus, mapping_bus_to_idx) M = A @ J if sparse_index_set_branch is None or len( sparse_index_set_branch) == _len_branch: ## the resulting matrix after inversion will be fairly dense, ## the scipy documenation recommends using dense for the inversion ## as well ref_bus_mask = np.ones(_len_bus, dtype=bool) ref_bus_mask[_ref_bus_idx] = False # M is now (A^T B_d A) with # row and column of reference # bus removed J0 = M[ref_bus_mask, :][:, ref_bus_mask] # (B_d A) with reference bus column removed B_dA = J[:, ref_bus_mask].A if connected: try: PTDF = np.linalg.solve(J0.T.A, B_dA.T).T except np.linalg.LinAlgError: logger.warning( "Matrix not invertible. Calculating pseudo-inverse instead." ) SENSI = np.linalg.pinv(J0.A, rcond=1e-7) PTDF = np.matmul(B_dA, SENSI) else: logger.warning( "Using pseudo-inverse method as network is disconnected") SENSI = np.linalg.pinv(J0.A, rcond=1e-7) PTDF = np.matmul(B_dA, SENSI) # insert 0 column for reference bus PTDF = np.insert(PTDF, _ref_bus_idx, np.zeros(_len_branch), axis=1) elif len(sparse_index_set_branch) < _len_branch: ref_bus_row = sp.coo_matrix(([1], ([0], [_ref_bus_idx])), shape=(1, _len_bus)) ref_bus_col = sp.coo_matrix(([1], ([_ref_bus_idx], [0])), shape=(_len_bus, 1)) J0 = sp.bmat([[M, ref_bus_col], [ref_bus_row, 0]], format='coo') B = np.array([], dtype=np.int64).reshape(_len_bus + 1, 0) _sparse_mapping_branch = { i: branch_n for i, branch_n in enumerate(index_set_branch) if branch_n in sparse_index_set_branch } ## TODO: Maybe just keep the sparse PTDFs as a dict of ndarrays? ## Right now the return type depends on the options ## passed in for idx, branch_name in _sparse_mapping_branch.items(): b = np.zeros((_len_branch, 1)) b[idx] = 1 _tmp = J.transpose() @ b _tmp = np.vstack([_tmp, 0]) B = np.concatenate((B, _tmp), axis=1) row_idx = list(_sparse_mapping_branch.keys()) PTDF = sp.lil_matrix((_len_branch, _len_bus)) _ptdf = sp.linalg.spsolve(J0.transpose().tocsr(), B).T PTDF[row_idx] = _ptdf[:, :-1] return PTDF
def print_condition_number(df): X = np.insert(np.array(df.values), 0, 1, axis=1) xpx = np.matmul(np.transpose(X), X) eigvals = [np.real(eig) for eig in np.linalg.eigvals(xpx)] print('Condition Number:' + str(abs(max(eigvals) / min(eigvals))))
def test_insert_2(): x = np.array([1, 2, 3]) y = np.insert(x, 0, 4) z = nw.insert(x, 0, 4) assert_almost_equal(y, z)
def test_pose(opt): if not os.path.isdir(opt.output_dir): os.makedirs(opt.output_dir) ##### init ##### input_uint8 = tf.placeholder( tf.uint8, [opt.batch_size, opt.img_height, opt.img_width, opt.seq_length * 3], name='raw_input') tgt_image = input_uint8[:, :, :, :3] src_image_stack = input_uint8[:, :, :, 3:] model = GeoNetModel(opt, tgt_image, src_image_stack, None) fetches = {"pose": model.pred_poses} saver = tf.train.Saver([var for var in tf.model_variables()]) ##### load test frames ##### seq_dir = os.path.join(opt.dataset_dir, 'sequences', '%.2d' % opt.pose_test_seq) img_dir = os.path.join(seq_dir, 'image_2') N = len(glob(img_dir + '/*.png')) test_frames = ['%.2d %.6d' % (opt.pose_test_seq, n) for n in range(N)] ##### load time file ##### with open(opt.dataset_dir + 'sequences/%.2d/times.txt' % opt.pose_test_seq, 'r') as f: times = f.readlines() times = np.array([float(s[:-1]) for s in times]) ##### Go! ##### max_src_offset = (opt.seq_length - 1) // 2 with tf.Session() as sess: saver.restore(sess, opt.init_ckpt_file) for tgt_idx in range(max_src_offset, N - max_src_offset, opt.batch_size): if (tgt_idx - max_src_offset) % 100 == 0: print('Progress: %d/%d' % (tgt_idx - max_src_offset, N)) inputs = np.zeros((opt.batch_size, opt.img_height, opt.img_width, 3 * opt.seq_length), dtype=np.uint8) for b in range(opt.batch_size): idx = tgt_idx + b if idx >= N - max_src_offset: break image_seq = load_image_sequence(opt.dataset_dir, test_frames, idx, opt.seq_length, opt.img_height, opt.img_width) inputs[b] = image_seq pred = sess.run(fetches, feed_dict={input_uint8: inputs}) pred_poses = pred['pose'] # Insert the target pose [0, 0, 0, 0, 0, 0] pred_poses = np.insert(pred_poses, max_src_offset, np.zeros((1, 6)), axis=1) for b in range(opt.batch_size): idx = tgt_idx + b if idx >= N - max_src_offset: break pred_pose = pred_poses[b] curr_times = times[idx - max_src_offset:idx + max_src_offset + 1] out_file = opt.output_dir + '%.6d.txt' % (idx - max_src_offset) dump_pose_seq_TUM(out_file, pred_pose, curr_times)
def insert_oxide_thickness(path_train_data, labels): train_data = np.loadtxt(path_train_data, skiprows=1) labels = np.insert(labels, 1, train_data[:, 1]) return labels
def check_lightcurve_time(light_curve, exposure_time, frame_time): """Check to be sure the provided lightcurve is long enough to cover the supplied total exposure time. If not, lengthen at the beginning or end such that it does. Times will only be added to the beginning if the first time entry in the lightcurve is > 0. Lightcurves where the initial time entry is < 0 will have all times < 0 chopped. This will allow the user to simulate lightcurves where the exposure starts somewhere in the middle of the lightcurve. Parameters ---------- light_curve : dict Dictionary of lightcurve. "fluxes" and "times" keys contain arrays of those values exposure_time : float Total exposure time for the full exposure being simulated (in seconds) frame_time : float Exposure time of a single frame of the observation Returns ------- light_curve : dict Potentially modified with added or removed elements """ times = copy.deepcopy(light_curve["times"].value) fluxes = copy.deepcopy(light_curve["fluxes"].value) time_units = light_curve["times"].unit flux_units = light_curve["fluxes"].unit adjusted = False # Remove elements where time < 0. if np.min(times) < 0.: positive_times = times >= 0. times = times[positive_times] fluxes = fluxes[positive_times] adjusted = True # If the times begin at values significantly > 0, # then add entries to bring the start back to time = 0 if np.min(times) > 0.: print(( "Lightcurve time values do not start at zero. Prepending an entry with time=0 " "and flux = 1.")) times = np.insert(times, 0, 0.) fluxes = np.insert(fluxes, 0, 1.) adjusted = True # If the ending time is less than the exposure's total # observation time, then add entries with flux=1 if np.max(times) < exposure_time: print(( "Lightcurve time values extend only to {} seconds. This is not long enough " "to cover the entire exposure time of {} seconds. Extending to cover the full " "exposure time with flux = 1.".format(np.max(times), exposure_time))) times = np.append(times, exposure_time + 5 * frame_time) fluxes = np.append(fluxes, 1.) adjusted = True if adjusted: light_curve["times"] = times * time_units light_curve["fluxes"] = fluxes * flux_units return light_curve
assert n_stations == full_data.shape[0] print('n_stations: {}, n_days: {}'.format(n_stations, n_days)) neighbour = 5 leap_years = np.zeros_like(years).astype(np.bool) for i, in_year in enumerate(np.split(valid_days, len(years))): leap_years[i] = in_year.sum() == 366 w_months = np.tile(np.repeat(np.arange(12), 31), years[-1] - years[0] + 1)[valid_days] w_days = np.tile(np.arange(365), years[-1] - years[0] + 1) for i, leap in enumerate(leap_years): if leap: w_days = np.insert(w_days, ((i + 1) * 365), 365) w_days_sin = np.sin(w_days / 367 * np.pi) w_days_cos = -np.cos(w_days / 367 * np.pi * 2) / 2 + 0.5 if dense: EXP = 'future' datas_temp_reg, keep_reg, gReg = clean_nodes(full_data, [1, 3], lon, lat, figs=True, rad=False) from GHCN_preprocessing import dataset_reg training, validation = dataset_reg(datas_temp_reg, lon[keep_reg], lat[keep_reg], alt[keep_reg],
def linear_normal(X,Y): X = np.insert(X, 0, 1, 1) # return parameters as numpy array return np.linalg.pinv(X.transpose() @ X) @ X.transpose() @ Y
def CriticalT(T, E): Epoly = numpy.poly1d(numpy.polyfit(T, E, T.size / 3)) dEpoly = numpy.diff(Epoly(T)) dEpoly = numpy.insert(dEpoly, 0, 0) return (T[numpy.argmin(dEpoly)])
def generateNlist(d_list, gray_list, lamb): #Conversion from gray values to refractive index maximum = 67 #np.amax(gray_list) #71 minimum = 40 #np.amin(gray_list) #40 n_list = np.zeros((lamb.size, gray_list.size), dtype=np.complex_) n1 = np.zeros((lamb.size, gray_list.size), dtype=np.complex_) n2 = np.zeros((lamb.size, gray_list.size), dtype=np.complex_) nmelanin = np.zeros((lamb.size, gray_list.size), dtype=np.complex_) n_list_converted = np.zeros((lamb.size, gray_list.size), dtype=np.complex_) #for k in 1/lamb: BR1 = 9464.8 AR1 = 1.515 #1.5145 BR2 = 20700 # B Coefficient in Cauchy's equation (real) AR2 = 1.648 #Calculates Cauchy's A from given values of B and n at 600nm BI2 = 210 #B coefficient in exponential equation(imaginary part) #210 AI2 = 0.5 #A coefficient in exponential equation (im part)#0.56 #3.0 for i in range(len(lamb)): k = 1 / lamb[i] #n1 = AR1 + BR1*(k**2) + 0*1j+ AI1 + BI1*(k**2) n1[i, :] = AR1 + BR1 * (k**2) nmelanin[i, :] = AR2 + BR2 * (k**2) + (AI2 * np.exp(-1 / (BI2 * k))) * 1j qpigm = 0.70 #0.85 n2[i, :] = n1[i, :] * (1 - qpigm) + nmelanin[i, :] * qpigm conversion = (n2 - n1) / (maximum - minimum) gray_list_trans = np.zeros((1, gray_list.size)) gray_list_trans[:, 0:] = gray_list[0:] for j in range(len(gray_list)): n_list_converted[i, j] = n2[ i, j] - conversion[i, j] * (gray_list_trans[0, j] - minimum) wavel = 1 / k n1i = n1.imag n2i = n2.imag ## if DEBUG: ## plt.figure(3) ## plt.plot(wavel, n1, marker='o', ms = 10, alpha=1, color='b', label='Chitin layer') ## plt.plot(wavel, n2, marker='o', ms = 10, alpha=1, color='k', label='Melanin layer') ## plt.xlabel('Wavelength (/nm)') ## plt.ylabel('Refractive index') ## plt.title('Dispersion relations, imaginary part. Blue = Chitin layer. Black = Melanin layer') ## #Adds bottom infinite layer at the end of the n list (same as last n calculated) n_last = [] n_last = n_list_converted[:, -1] n_lists = np.column_stack((n_list_converted, n_last)) #Adds air layer at the top of the n list air = 1.00029 n_air = np.full((lamb.size, 1), air) n_list = np.insert(n_lists, 0, air, axis=1) return n_list
def get_arc_lengths(self, waypoints): d = np.diff(waypoints, axis=0) consecutive_diff = np.sqrt(np.sum(np.power(d, 2), axis=1)) dists_cum = np.cumsum(consecutive_diff) dists_cum = np.insert(dists_cum, 0, 0.0) return dists_cum
def main(d): # d is a dictionary containing the auto-encoder design specifications and training phase specifications # RESET DEFAULT GRAPH print('resetting default graph...', flush=True) tf.reset_default_graph() # FINISH CONFIGURATION print('finishing configuration...', flush=True) # specify noise distribution if d['noise_distribution'] == 'truncnorm': noise_distribution = tf.truncated_normal elif d['noise_distribution'] == 'uniform': noise_distribution = tf.random_uniform # specify distribution of initial weights if d['initialization_distribution'] == 'truncnorm': initialization_distribution = tf.truncated_normal # specify activation function if d['activation_function'] == 'tanh': activation_function = {'tf':tf.tanh, 'np':sdae_apply_functions.tanh} elif d['activation_function'] == 'relu': activation_function = {'tf':tf.nn.relu, 'np':sdae_apply_functions.relu} elif d['activation_function'] == 'elu': activation_function = {'tf':tf.nn.elu, 'np':sdae_apply_functions.elu} elif d['activation_function'] == 'sigmoid': activation_function = {'tf':tf.sigmoid, 'np':sdae_apply_functions.sigmoid} # load data partitions = ['train', 'valid', 'test'] dataset = {} for partition in partitions: dataset[partition] = datasetIO.load_datamatrix('{0}/{1}.pickle'.format(d['input_path'], partition)) d['{0}_examples'.format(partition)] = dataset[partition].shape[0] # create output directory if not os.path.exists(d['output_path']): os.makedirs(d['output_path']) # initialize model architecture (number of layers and dimension of each layer) d['current_dimensions'] = d['all_dimensions'][:d['current_hidden_layer']+1] # dimensions of model up to current depth # specify embedding function for current training phase # we want the option of skipping the embedding activation function to apply only to the full model if not d['apply_activation_to_embedding'] and d['current_dimensions'] == d['all_dimensions']: d['current_apply_activation_to_embedding'] = False else: d['current_apply_activation_to_embedding'] = True # initialize assignments of training examples to mini-batches and number of training steps for stochastic gradient descent d['batch_size'] = d['batch_fraction']*d['train_examples'] batch_ids = create_batch_ids(d['train_examples'], d['batch_size']) d['batches'] = np.unique(batch_ids).size d['steps'] = d['current_epochs']*d['batches'] # specify path to weights from previous training run d['previous_variables_path'] = '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['previous_hidden_layer'], d['previous_finetuning_run']) d['fix_or_init'] = 'fix' if d['current_finetuning_run'] == 0 else 'init' # fix for pretraining, init for finetuning # specify rows and columns of figure showing data reconstructions d['reconstruction_rows'] = int(np.round(np.sqrt(np.min([100, d['valid_examples']])/2))) d['reconstruction_cols'] = 2*d['reconstruction_rows'] # print some design information print('input path: {0}'.format(d['input_path']), flush=True) print('output path: {0}'.format(d['output_path']), flush=True) print('previous variables path: {0}'.format(d['previous_variables_path']), flush=True) print('previous variables fix or init: {0}'.format(d['fix_or_init']), flush=True) # SAVE CURRENT DESIGN print('saving current design...', flush=True) with open('{0}/design_layer{1!s}_finetuning{2!s}.json'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', encoding='utf-8', errors='surrogateescape') as fw: json.dump(d, fw, indent=2) # DEFINE REPORTING VARIABLES print('defining reporting variables...', flush=True) reporting_steps = sdae_design_functions.create_reporting_steps(d['steps'], d['firstcheckpoint'], d['maxstepspercheckpoint']) valid_losses = np.zeros(reporting_steps.size, dtype='float32') train_losses = np.zeros(reporting_steps.size, dtype='float32') valid_noisy_losses = np.zeros(reporting_steps.size, dtype='float32') train_noisy_losses = np.zeros(reporting_steps.size, dtype='float32') print('reporting steps:', reporting_steps, flush=True) # DEFINE COMPUTATIONAL GRAPH # define placeholders for input data, use None to allow feeding different numbers of examples print('defining placeholders...', flush=True) training = tf.placeholder(tf.bool, []) noise_stdv = tf.placeholder(tf.float32, []) noise_prob = tf.placeholder(tf.float32, []) training_and_validation_data_initializer = tf.placeholder(tf.float32, [dataset['train'].shape[0]+dataset['valid'].shape[0], dataset['train'].shape[1]]) selection_mask = tf.placeholder(tf.bool, [dataset['train'].shape[0]+dataset['valid'].shape[0]]) # define variables # W contains the weights, bencode contains the biases for encoding, and bdecode contains the biases for decoding print('defining variables...', flush=True) training_and_validation_data = tf.Variable(training_and_validation_data_initializer, trainable=False, collections=[]) if os.path.exists(d['previous_variables_path']): # update variables (if continuing from a previous training run) print('loading previous variables...', flush=True) global_step, W, bencode, bdecode = update_variables(d['current_dimensions'], initialization_distribution, d['initialization_sigma'], d['previous_variables_path'], d['fix_or_init'], d['include_global_step']) elif (d['current_hidden_layer'] == 1 and d['current_finetuning_run'] == 0) or d['skip_layerwise_training']: # create variables global_step, W, bencode, bdecode = create_variables(d['current_dimensions'], initialization_distribution, d['initialization_sigma']) else: raise ValueError('could not find previous variables') # define model # h contains the activations from input layer to bottleneck layer # hhat contains the activations from bottleneck layer to output layer # xhat is a reference to the output layer (i.e. the reconstruction) print('defining model...', flush=True) x = tf.boolean_mask(training_and_validation_data, selection_mask) if d['noise_distribution'] == 'truncnorm': noise = noise_distribution(tf.shape(x), stddev=noise_stdv) else: noise = noise_distribution(tf.shape(x), minval=0, maxval=noise_stdv) noise_mask = tf.to_float(tf.random_uniform(tf.shape(x)) <= noise_prob) xnoisy = apply_noise(x, noise, noise_mask, d['noise_operation']) if d['activation_function'] == 'sigmoid' and d['apply_activation_to_output']: h, hhat, xhat = create_autoencoder(xnoisy, activation_function['tf'], False, d['current_apply_activation_to_embedding'], d['use_batchnorm'], training, W, bencode, bdecode) else: h, hhat, xhat = create_autoencoder(xnoisy, activation_function['tf'], d['apply_activation_to_output'], d['current_apply_activation_to_embedding'], d['use_batchnorm'], training, W, bencode, bdecode) # define loss print('defining loss...', flush=True) if d['activation_function'] == 'sigmoid' and d['apply_activation_to_output']: loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=xhat)) else: loss = tf.reduce_mean(tf.squared_difference(x, xhat)) # squared error loss # define optimizer and training function print('defining optimizer and training function...', flush=True) optimizer = tf.train.AdamOptimizer(learning_rate=d['learning_rate'], epsilon=d['epsilon'], beta1=d['beta1'], beta2=d['beta2']) train_ops = optimizer.minimize(loss, global_step=global_step) # define update ops and add to train ops (if using batch norm) if d['use_batchnorm']: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_ops = [train_ops, update_ops] # collect batch norm variables if d['use_batchnorm']: bn_gammas = tf.global_variables(scope='batch_normalization.{0,2}/gamma:0') print(bn_gammas, flush=True) bn_betas = tf.global_variables(scope='batch_normalization.{0,2}/beta:0') bn_moving_means = tf.global_variables(scope='batch_normalization.{0,2}/moving_mean:0') bn_moving_variances = tf.global_variables(scope='batch_normalization.{0,2}/moving_variance:0') # define bottleneck layer preactivation # bottleneck_preactivation = tf.matmul(h[-2], W[-1]) + bencode[-1] # INITIALIZE TENSORFLOW SESSION print('initializing tensorflow session...', flush=True) init = tf.global_variables_initializer() session_config = configure_session(d['processor'], d['gpu_memory_fraction']) with tf.Session(config=session_config) as sess: sess.run(init) # TRAINING print('training...', flush=True) sess.run(training_and_validation_data.initializer, feed_dict={training_and_validation_data_initializer: np.append(dataset['train'].matrix, dataset['valid'].matrix, 0)}) validation_id = -1 batch_and_validation_ids = np.full(dataset['train'].shape[0]+dataset['valid'].shape[0], validation_id, dtype=batch_ids.dtype) is_train = np.append(np.ones(dataset['train'].shape[0], dtype='bool'), np.zeros(dataset['valid'].shape[0], dtype='bool')) is_valid = ~is_train training_step = 0 i = 0 overfitting_score = 0 stopearly = False starttime = time.time() with open('{0}/log_layer{1!s}_finetuning{2!s}.txt'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', buffering=1) as fl: fl.write('\t'.join(['step', 'train_loss', 'valid_loss', 'train_noisy_loss', 'valid_noisy_loss', 'time']) + '\n') for epoch in range(d['current_epochs']): if stopearly: break # randomize assignment of training examples to batches np.random.shuffle(batch_ids) batch_and_validation_ids[is_train] = batch_ids for batch in range(d['batches']): training_step += 1 # select mini-batch selected = batch_and_validation_ids == batch # update weights sess.run(train_ops, feed_dict={training:True, selection_mask:selected, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']}) # record training and validation errors if training_step == reporting_steps[i]: train_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_train, noise_prob:0, noise_stdv:0}) train_noisy_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_train, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']}) valid_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_valid, noise_prob:0, noise_stdv:0}) valid_noisy_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_valid, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']}) print('step:{0:1.6g}, train loss:{1:1.3g}, valid loss:{2:1.3g}, train noisy loss:{3:1.3g},valid noisy loss:{4:1.3g}, time:{5:1.6g}'.format(reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime), flush=True) fl.write('\t'.join(['{0:1.6g}'.format(x) for x in [reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime]]) + '\n') # save current weights, reconstructions, and projections if training_step >= d['startsavingstep'] or training_step == reporting_steps[-1]: with open('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], training_step), 'wb') as fw: pickle.dump((sess.run(global_step), sess.run(W), sess.run(bencode), sess.run(bdecode)), fw) if d['use_batchnorm']: with open('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], training_step), 'wb') as fw: pickle.dump((sess.run(bn_gammas), sess.run(bn_betas), sess.run(bn_moving_means), sess.run(bn_moving_variances)), fw) # stop early if overfitting if valid_losses[i] >= 1.01*(np.insert(valid_losses[:i], 0, np.inf).min()): overfitting_score += 1 else: overfitting_score = 0 if overfitting_score == d['overfitting_score_max']: stopearly = True print('stopping early!', flush=True) break i += 1 # end tensorflow session print('closing tensorflow session...', flush=True) # ROLL BACK IF OVERFITTING if stopearly: print('rolling back...', flush=True) reporting_steps = reporting_steps[:i+1] train_losses = train_losses[:i+1] valid_losses = valid_losses[:i+1] train_noisy_losses = train_noisy_losses[:i+1] valid_noisy_losses = valid_noisy_losses[:i+1] # selected_step = max([reporting_steps[i-d['overfitting_score_max']], d['startsavingstep']]) else: print('completed all training steps...', flush=True) # selected_step = reporting_steps[-1] selected_step = min([max([reporting_steps[np.argmin(valid_losses)], d['startsavingstep']]), reporting_steps[-1]]) print('selected step:{0}...'.format(selected_step), flush=True) # SAVE RESULTS print('saving results...', flush=True) with open('{0}/optimization_path_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'wb') as fw: pickle.dump({'reporting_steps':reporting_steps, 'valid_losses':valid_losses, 'train_losses':train_losses, 'valid_noisy_losses':valid_noisy_losses, 'train_noisy_losses':train_noisy_losses}, fw) if d['current_dimensions'] == d['all_dimensions'] and (not d['use_finetuning'] or d['current_finetuning_run'] > 0): shutil.copyfile('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) if d['use_batchnorm']: shutil.copyfile('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) else: shutil.move('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) if d['use_batchnorm']: shutil.move('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) with open('{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'rb') as fr: W, Be, Bd = pickle.load(fr)[1:] # global_step, W, bencode, bdecode if d['use_batchnorm']: with open('{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'rb') as fr: batchnorm_variables = pickle.load(fr) # gammas, betas, moving_means, moving_variances batchnorm_encode_variables, batchnorm_decode_variables = sdae_apply_functions.align_batchnorm_variables(batchnorm_variables, d['current_apply_activation_to_embedding'], d['apply_activation_to_output']) recon = {} embed = {} error = {} embed_preactivation = {} for partition in partitions: if d['use_batchnorm']: recon[partition], embed[partition], error[partition] = sdae_apply_functions.encode_and_decode(dataset[partition], W, Be, Bd, activation_function['np'], d['current_apply_activation_to_embedding'], d['apply_activation_to_output'], return_embedding=True, return_reconstruction_error=True, bn_encode_variables=batchnorm_encode_variables, bn_decode_variables=batchnorm_decode_variables) embed_preactivation[partition] = sdae_apply_functions.encode(dataset[partition], W, Be, activation_function['np'], apply_activation_to_embedding=False, bn_variables=batchnorm_encode_variables) else: recon[partition], embed[partition], error[partition] = sdae_apply_functions.encode_and_decode(dataset[partition], W, Be, Bd, activation_function['np'], d['current_apply_activation_to_embedding'], d['apply_activation_to_output'], return_embedding=True, return_reconstruction_error=True) embed_preactivation[partition] = sdae_apply_functions.encode(dataset[partition], W, Be, activation_function['np'], apply_activation_to_embedding=False) print('{0} reconstruction error: {1:1.3g}'.format(partition, error[partition]), flush=True) if d['current_dimensions'] == d['all_dimensions'] and (not d['use_finetuning'] or d['current_finetuning_run'] > 0): datasetIO.save_datamatrix('{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.pickle'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition]) datasetIO.save_datamatrix('{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.txt.gz'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition]) if d['current_apply_activation_to_embedding']: datasetIO.save_datamatrix('{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.pickle'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition]) datasetIO.save_datamatrix('{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.txt.gz'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition]) # PLOT LOSS print('plotting loss...', flush=True) fg, ax = plt.subplots(1, 1, figsize=(3.25,2.25)) ax.set_position([0.55/3.25, 0.45/2.25, 2.6/3.25, 1.7/2.25]) ax.semilogx(reporting_steps, train_losses, ':r', linewidth=1, label='train') ax.semilogx(reporting_steps, valid_losses, '-g', linewidth=1, label='valid') ax.semilogx(reporting_steps, train_noisy_losses, '--b', linewidth=1, label='train,noisy') ax.semilogx(reporting_steps, valid_noisy_losses, '-.k', linewidth=1, label='valid,noisy') ax.legend(loc='best', fontsize=8) ax.set_ylabel('loss', fontsize=8) ax.set_xlabel('steps (selected step:{0!s})'.format(selected_step), fontsize=8) ax.set_xlim(reporting_steps[0]-1, reporting_steps[-1]+1) # ax.set_ylim(0, 1) ax.tick_params(axis='both', which='major', left=True, right=True, bottom=True, top=False, labelleft=True, labelright=False, labelbottom=True, labeltop=False, labelsize=8) fg.savefig('{0}/optimization_path_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() # PLOT RECONSTRUCTIONS print('plotting reconstructions...', flush=True) num_recons = min([d['reconstruction_rows']*d['reconstruction_cols'], dataset['valid'].shape[0]]) x_valid = dataset['valid'].matrix[:num_recons,:] xr_valid = recon['valid'].matrix[:num_recons,:] if x_valid.shape[1] > 1000: x_valid = x_valid[:,:1000] xr_valid = xr_valid[:,:1000] lb = np.append(x_valid, xr_valid, 1).min(1) ub = np.append(x_valid, xr_valid, 1).max(1) if d['apply_activation_to_output']: if d['activation_function'] == 'sigmoid': lb[:] = -0.05 ub[:] = 1.05 elif d['activation_function'] == 'tanh': lb[:] = -1.05 ub[:] = 1.05 fg, axs = plt.subplots(d['reconstruction_rows'], d['reconstruction_cols'], figsize=(6.5,3.25)) for i, ax in enumerate(axs.reshape(-1)): if i < num_recons: ax.plot(x_valid[i,:], xr_valid[i,:], 'ok', markersize=0.5, markeredgewidth=0, alpha=0.1) ax.set_ylim(lb[i], ub[i]) ax.set_xlim(lb[i], ub[i]) ax.tick_params(axis='both', which='major', left=False, right=False, bottom=False, top=False, labelleft=False, labelright=False, labelbottom=False, labeltop=False, pad=4) ax.set_frame_on(False) ax.axvline(lb[i], linewidth=1, color='k') ax.axvline(ub[i], linewidth=1, color='k') ax.axhline(lb[i], linewidth=1, color='k') ax.axhline(ub[i], linewidth=1, color='k') else: fg.delaxes(ax) fg.savefig('{0}/reconstructions_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=1200) plt.close() # PLOT 2D EMBEDDING if d['current_dimensions'][-1] == 2 and (not d['use_finetuning'] or d['current_finetuning_run'] > 0): print('plotting 2d embedding...', flush=True) fg, ax = plt.subplots(1, 1, figsize=(6.5,6.5)) ax.set_position([0.15/6.5, 0.15/6.5, 6.2/6.5, 6.2/6.5]) ax.plot(embed['train'].matrix[:,0], embed['train'].matrix[:,1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0) ax.plot(embed['valid'].matrix[:,0], embed['valid'].matrix[:,1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1) ax.tick_params(axis='both', which='major', bottom=False, top=False, labelbottom=False, labeltop=False, left=False, right=False, labelleft=False, labelright=False, pad=4) ax.set_frame_on(False) fg.savefig('{0}/embedding_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() if d['current_apply_activation_to_embedding']: fg, ax = plt.subplots(1, 1, figsize=(6.5,6.5)) ax.set_position([0.15/6.5, 0.15/6.5, 6.2/6.5, 6.2/6.5]) ax.plot(embed_preactivation['train'].matrix[:,0], embed_preactivation['train'].matrix[:,1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0) ax.plot(embed_preactivation['valid'].matrix[:,0], embed_preactivation['valid'].matrix[:,1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1) ax.tick_params(axis='both', which='major', bottom=False, top=False, labelbottom=False, labeltop=False, left=False, right=False, labelleft=False, labelright=False, pad=4) ax.set_frame_on(False) fg.savefig('{0}/embedding_preactivation_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() print('done training phase.', flush=True) return d['current_hidden_layer'], d['current_finetuning_run'], d['current_epochs']
def specpolfinalstokes(infilelist,logfile='salt.log',debug=False, \ HW_Cal_override=False,Linear_PolZeropoint_override=False,PAZeropoint_override=False): """Combine the raw stokes and apply the polarimetric calibrations Parameters ---------- infilelist: list List of filenames that include an extracted spectrum logfile: str Name of file for logging """ """ _l: line in calibration file _i: index in file list _j: rawstokes = waveplate position pair index (enumeration within config, including repeats) _J: cycle number idx (0,1,..) for each rawstokes _k: combstokes = waveplate position pair index (enumeration within config, repeats combined) _K: pair = waveplate position pair index (enumeration within obs) _p: pair = waveplate position pair # (eg 0,1,2,3 = 0 4 1 5 2 6 3 7 for LINEAR-HI, sorted in h0 order) _s: normalized linear stokes for zeropoint correction (0,1) = (q,u) _S: unnormalized raw stokes within waveplate position pair: (eg 0,1 = I,Q) _F: unnormalized final stokes (eg 0,1,2 = I,Q,U) """ calhistorylist = ["PolCal Model: 20170429",] patternlist = open(datadir+'wppaterns.txt','r').readlines() patternpairs = dict(); patternstokes = dict(); patterndict = dict() for p in patternlist: if p.split()[0] == '#': continue patterndict[p.split()[0]]=np.array(p.split()[3:]).astype(int).reshape((-1,2)) patternpairs[p.split()[0]]=(len(p.split())-3)/2 patternstokes[p.split()[0]]=int(p.split()[1]) if len(glob.glob('specpol*.log')): logfile=glob.glob('specpol*.log')[0] with logging(logfile, debug) as log: log.message('specpolfinalstokes version: 20171226', with_header=False) # organize data using names. # allrawlist = infileidx,object,config,wvplt,cycle for each infile. obsdict=obslog(infilelist) files = len(infilelist) allrawlist = [] for i in range(files): object,config,wvplt,cycle = os.path.basename(infilelist[i]).rsplit('.',1)[0].rsplit('_',3) if (config[0]!='c')|(wvplt[0]!='h')|(not cycle.isdigit()): log.message('File '+infilelist[i]+' is not a raw stokes file.' , with_header=False) continue allrawlist.append([i,object,config,wvplt,cycle]) configlist = sorted(list(set(ele[2] for ele in allrawlist))) # unique configs # input correct HWCal and TelZeropoint calibration files dateobs = obsdict['DATE-OBS'][0].replace('-','') HWCalibrationfile = datedfile(datadir+"RSSpol_HW_Calibration_yyyymmdd_vnn.txt",dateobs) hwav_l,heff_l,hpa_l = np.loadtxt(HWCalibrationfile,dtype=float,unpack=True,usecols=(0,1,2),ndmin=2) TelZeropointfile = datedfile(datadir+"RSSpol_Linear_TelZeropoint_yyyymmdd_vnn.txt",dateobs) twav_l,tq0_l,tu0_l,err_l = np.loadtxt(TelZeropointfile,dtype=float,unpack=True,ndmin=2) # input PAZeropoint file and get correct entry dpadatever,dpa = datedline(datadir+"RSSpol_Linear_PAZeropoint.txt",dateobs).split() dpa = float(dpa) # prepare calibration keyword documentation pacaltype = "Equatorial" if HW_Cal_override: Linear_PolZeropoint_override=True PAZeropoint_override=True pacaltype = "Instrumental" calhistorylist.append("HWCal: Uncalibrated") elif Linear_PolZeropoint_override: PAZeropoint_override=True calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile),"PolZeropoint: Null"]) elif PAZeropoint_override: calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile), \ "PolZeropoint: "+os.path.basename(TelZeropointfile), "PAZeropoint: Null"]) else: calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile), \ "PolZeropoint: "+os.path.basename(TelZeropointfile), \ "PAZeropoint: RSSpol_Linear_PAZeropoint.txt "+str(dpadatever)+" "+str(dpa)]) log.message(' PA type: '+pacaltype, with_header=False) if len(calhistorylist): log.message(' '+'\n '.join(calhistorylist), with_header=False) chifence_d = 2.2*np.array([6.43,4.08,3.31,2.91,2.65,2.49,2.35,2.25]) # *q3 for upper outer fence outlier for each dof # do one config at a time. # rawlist = infileidx,object,config,wvplt,cycle for each infile *in this config*. # rawlist is sorted with cycle varying fastest # rawstokes = len(rawlist). j is idx in rawlist. for conf in configlist: log.message("\nConfiguration: %s" % conf, with_header=False) rawlist = [entry for entry in allrawlist if entry[2]==conf] for col in (4,3,1,2): rawlist = sorted(rawlist,key=operator.itemgetter(col)) rawstokes = len(rawlist) # rawlist is sorted with cycle varying fastest wav0 = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['CRVAL1'] dwav = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['CDELT1'] wavs = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['NAXIS1'] wav_w = wav0 + dwav*np.arange(wavs) # interpolate HW, telZeropoint calibration wavelength dependence for this config okcal_w = np.ones(wavs).astype(bool) if not HW_Cal_override: heff_w = interp1d(hwav_l,heff_l,kind='cubic',bounds_error=False)(wav_w) hpar_w = -interp1d(hwav_l,hpa_l,kind='cubic',bounds_error=False)(wav_w) okcal_w &= ~np.isnan(heff_w) hpar_w[~okcal_w] = 0. if not Linear_PolZeropoint_override: tel0_sw = interp1d(twav_l,np.array([tq0_l,tu0_l]),kind='cubic',bounds_error=False)(wav_w) okcal_w &= ~np.isnan(tel0_sw[0]) tel0_sw /= 100. # table is in % # get spectrograph calibration file, spectrograph coordinates grating = pyfits.getheader(infilelist[rawlist[0][0]])['GRATING'] grang = pyfits.getheader(infilelist[rawlist[0][0]])['GR-ANGLE'] artic = pyfits.getheader(infilelist[rawlist[0][0]])['AR-ANGLE'] SpecZeropointfile = datedfile(datadir+ "RSSpol_Linear_SpecZeropoint_"+grating+"_yyyymmdd_vnn.txt",dateobs) if len(SpecZeropointfile): calhistorylist.append(SpecZeropointfile) # get all rawstokes data # comblist = last rawlistidx,object,config,wvplt,cycles,wppat # one entry for each set of cycles that needs to be combined (i.e, one for each wvplt) stokes_jSw = np.zeros((rawstokes,2,wavs)) var_jSw = np.zeros_like(stokes_jSw) covar_jSw = np.zeros_like(stokes_jSw) bpm_jSw = np.zeros_like(stokes_jSw).astype(int) comblist = [] for j in range(rawstokes): i,object,config,wvplt,cycle = rawlist[j] if j==0: cycles = 1 lampid = pyfits.getheader(infilelist[i],0)['LAMPID'].strip().upper() telpa = float(pyfits.getheader(infilelist[i],0)['TELPA']) if lampid != "NONE": pacaltype ="Instrumental" if pacaltype == "Equatorial": eqpar_w = hpar_w + dpa + (telpa % 180) # if object,config,wvplt changes, start a new comblist entry else: if rawlist[j-1][1:4] != rawlist[j][1:4]: cycles = 1 else: cycles += 1 wppat = pyfits.getheader(infilelist[i])['WPPATERN'].upper() stokes_jSw[j] = pyfits.open(infilelist[i])['SCI'].data.reshape((2,-1)) var_jSw[j] = pyfits.open(infilelist[i])['VAR'].data.reshape((2,-1)) covar_jSw[j] = pyfits.open(infilelist[i])['COV'].data.reshape((2,-1)) bpm_jSw[j] = pyfits.open(infilelist[i])['BPM'].data.reshape((2,-1)) # apply telescope zeropoint calibration, q rotated to raw coordinates if not Linear_PolZeropoint_override: trkrho = pyfits.getheader(infilelist[i])['TRKRHO'] dpatelraw_w = -(22.5*float(wvplt[1]) + hpar_w + trkrho + dpa) rawtel0_sw = \ specpolrotate(tel0_sw,0,0,dpatelraw_w,normalized=True)[0] rawtel0_sw[:,okcal_w] *= heff_w[okcal_w] stokes_jSw[j,1,okcal_w] -= stokes_jSw[j,0,okcal_w]*rawtel0_sw[0,okcal_w] if cycles==1: comblist.append((j,object,config,wvplt,1,wppat)) else: comblist[-1] = (j,object,config,wvplt,cycles,wppat) # combine multiple cycles as necessary. Absolute stokes is on a per cycle basis. # polarimetric combination on normalized stokes basis # to avoid coupling mean syserr into polarimetric spectral features combstokess = len(comblist) stokes_kSw = np.zeros((combstokess,2,wavs)) var_kSw = np.zeros_like(stokes_kSw) covar_kSw = np.zeros_like(stokes_kSw) cycles_kw = np.zeros((combstokess,wavs)).astype(int) chi2cycle_kw = np.zeros((combstokess,wavs)) badcyclechi_kw = np.zeros((combstokess,wavs),dtype=bool) havecyclechi_k = np.zeros(combstokess,dtype=bool) # obslist = first comblist idx,object,config,wppat,pairs # k = idx in comblist obslist = [] jlistk = [] # list of rawstokes idx for each comblist entry Jlistk = [] # list of cycle number for each comblist entry obsobject = '' obsconfig = '' chi2cycle_j = np.zeros(rawstokes) syserrcycle_j = np.zeros(rawstokes) iscull_jw = np.zeros((rawstokes,wavs),dtype=bool) stokes_kSw = np.zeros((combstokess,2,wavs)) var_kSw = np.zeros_like(stokes_kSw) nstokes_kw = np.zeros((combstokess,wavs)) nvar_kw = np.zeros_like(nstokes_kw) ncovar_kw = np.zeros_like(nstokes_kw) chi2cyclenet_k = np.zeros(combstokess) syserrcyclenet_k = np.zeros(combstokess) for k in range(combstokess): j,object,config,wvplt,cycles,wppat = comblist[k] jlistk.append(range(j-cycles+1,j+1)) Jlistk.append([int(rawlist[jj][4])-1 for jj in range(j-cycles+1,j+1)]) # J = cycle-1, counting from 0 nstokes_Jw = np.zeros((cycles,wavs)) nvar_Jw = np.zeros((cycles,wavs)) ncovar_Jw = np.zeros((cycles,wavs)) bpm_Jw = np.zeros((cycles,wavs)) ok_Jw = np.zeros((cycles,wavs),dtype=bool) for J,j in enumerate(jlistk[k]): bpm_Jw[J] = bpm_jSw[j,0] ok_Jw[J] = (bpm_Jw[J] ==0) nstokes_Jw[J][ok_Jw[J]] = stokes_jSw[j,1][ok_Jw[J]]/stokes_jSw[j,0][ok_Jw[J]] nvar_Jw[J][ok_Jw[J]] = var_jSw[j,1][ok_Jw[J]]/(stokes_jSw[j,0][ok_Jw[J]])**2 ncovar_Jw[J][ok_Jw[J]] = covar_jSw[j,1][ok_Jw[J]]/(stokes_jSw[j,0][ok_Jw[J]])**2 # Culling: for multiple cycles, compare each cycle with every other cycle (dof=1). # bad wavelengths flagged for P < .02% (1/2000): chisq > 13.8 (chi2.isf(q=.0002,df=1)) # for cycles>2, vote to cull specific pair/wavelength, otherwise cull wavelength cycles_kw[k] = (1-bpm_Jw).sum(axis=0).astype(int) okchi_w = (cycles_kw[k] > 1) chi2lim = 13.8 havecyclechi_k[k] = okchi_w.any() if cycles > 1: ok_Jw[J] = okchi_w & (bpm_Jw[J] ==0) chi2cycle_JJw = np.zeros((cycles,cycles,wavs)) badcyclechi_JJw = np.zeros((cycles,cycles,wavs)) ok_JJw = ok_Jw[:,None,:] & ok_Jw[None,:,:] nstokes_JJw = nstokes_Jw[:,None] - nstokes_Jw[None,:] nvar_JJw = nvar_Jw[:,None] + nvar_Jw[None,:] chi2cycle_JJw[ok_JJw] = nstokes_JJw[ok_JJw]**2/nvar_JJw[ok_JJw] triuidx = np.triu_indices(cycles,1) # _i enumeration of cycle differences chi2cycle_iw = chi2cycle_JJw[triuidx] badcyclechi_w = (chi2cycle_iw > chi2lim).any(axis=(0)) badcyclechiall_w = (badcyclechi_w & (ok_JJw[triuidx].reshape((-1,wavs)).sum(axis=0)<3)) badcyclechicull_w = (badcyclechi_w & np.logical_not(badcyclechiall_w)) wavcull_W = np.where(badcyclechicull_w)[0] # cycles>2, cull by voting if wavcull_W.shape[0]: for W,w in enumerate(wavcull_W): J_I = np.array(triuidx).T[np.argsort(chi2cycle_iw[:,w])].flatten() _,idx = np.unique(J_I,return_index=True) Jcull = J_I[np.sort(idx)][-1] jcull = jlistk[k][Jcull] iscull_jw[jcull,w] = True # for reporting bpm_jSw[jcull,:,w] = 1 else: for j in jlistk[k]: iscull_jw[j] = badcyclechiall_w # for reporting bpm_jSw[j][:,badcyclechiall_w] = 1 for J,j in enumerate(jlistk[k]): bpm_Jw[J] = bpm_jSw[j,0] if debug: obsname = object+"_"+config ok_Jw = okchi_w[None,:] & (bpm_Jw ==0) np.savetxt(obsname+"_nstokes_Jw_"+str(k)+".txt",np.vstack((wav_w,ok_Jw.astype(int), \ nstokes_Jw,nvar_Jw)).T, fmt="%8.2f "+cycles*"%3i "+cycles*"%10.6f "+cycles*"%10.12f ") np.savetxt(obsname+"_chi2cycle_iw_"+str(k)+".txt",np.vstack((wav_w,okchi_w.astype(int), \ chi2cycle_iw.reshape((-1,wavs)),badcyclechi_w,ok_JJw[triuidx].reshape((-1,wavs)).sum(axis=0))).T, \ fmt="%8.2f %3i "+chi2cycle_iw.shape[0]*"%10.7f "+" %2i %2i") np.savetxt(obsname+"_Jcull_kw_"+str(k)+".txt",np.vstack((wav_w,okchi_w.astype(int), \ iscull_jw[jlistk[k]].astype(int).reshape((-1,wavs)))).T, fmt="%8.2f %3i "+cycles*" %3i") if ((object != obsobject) | (config != obsconfig)): obslist.append([k,object,config,wppat,1]) obsobject = object; obsconfig = config else: obslist[-1][4] +=1 # Now combine cycles, using normalized stokes to minimize systematic errors # first normalize cycle members J at wavelengths where all cycles have data: cycles_kw[k] = (1-bpm_Jw).sum(axis=0).astype(int) ok_w = (cycles_kw[k] > 0) okall_w = (cycles_kw[k] == cycles) normint_J = np.array(stokes_jSw[jlistk[k],0][:,okall_w].sum(axis=1)) normint_J /= np.mean(normint_J) stokes_JSw = stokes_jSw[jlistk[k]]/normint_J[:,None,None] var_JSw = var_jSw[jlistk[k]]/normint_J[:,None,None]**2 covar_JSw = covar_jSw[jlistk[k]]/normint_J[:,None,None]**2 for J in range(cycles): okJ_w = ok_w & (bpm_Jw[J] ==0) # average the intensity stokes_kSw[k,0,okJ_w] += stokes_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w] var_kSw[k,0,okJ_w] += var_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w]**2 covar_kSw[k,0,okJ_w] += covar_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w]**2 # now the normalized stokes nstokes_kw[k][okJ_w] += (stokes_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w])/cycles_kw[k][okJ_w] nvar_kw[k][okJ_w] += (var_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w]**2)/cycles_kw[k][okJ_w]**2 ncovar_kw[k][okJ_w] += (covar_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w]**2)/cycles_kw[k][okJ_w]**2 stokes_kSw[k,1] = nstokes_kw[k]*stokes_kSw[k,0] var_kSw[k,1] = nvar_kw[k]*stokes_kSw[k,0]**2 covar_kSw[k,1] = ncovar_kw[k]*stokes_kSw[k,0]**2 if debug: obsname = object+"_"+config np.savetxt(obsname+"_stokes_kSw_"+str(k)+".txt",np.vstack((wav_w,ok_w.astype(int), \ stokes_kSw[k])).T, fmt="%8.2f %3i "+2*"%12.3f ") # compute mean chisq for each pair having multiple cycles if cycles > 1: nstokeserr_Jw = np.zeros((cycles,wavs)) nerr_Jw = np.zeros((cycles,wavs)) for J in range(cycles): okJ_w = ok_w & (bpm_Jw[J] ==0) nstokes_Jw[J][okJ_w] = stokes_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w] nvar_Jw[J][okJ_w] = var_JSw[J,1][okJ_w]/(stokes_JSw[J,0][okJ_w])**2 nstokeserr_Jw[J] = (nstokes_Jw[J] - nstokes_kw[k]) nvar_w = nvar_Jw[J] - nvar_kw[k] okall_w &= (nvar_w > 0.) nerr_Jw[J,okall_w] = np.sqrt(nvar_w[okall_w]) nstokessyserr_J = np.average(nstokeserr_Jw[:,okall_w],weights=1./nerr_Jw[:,okall_w],axis=1) nstokeserr_Jw -= nstokessyserr_J[:,None] for J,j in enumerate(jlistk[k]): loc,scale = norm.fit(nstokeserr_Jw[J,okall_w]/nerr_Jw[J,okall_w]) chi2cycle_j[j] = scale**2 syserrcycle_j[j] = nstokessyserr_J[J] chi2cyclenet_k[k] = chi2cycle_j[jlistk[k]].mean() syserrcyclenet_k[k] = np.sqrt((syserrcycle_j[jlistk[k]]**2).sum())/len(jlistk[k]) if debug: obsname = object+"_"+config chisqanalysis(obsname,nstokeserr_Jw,nerr_Jw,okall_w) # for each obs combine raw stokes, apply efficiency and PA calibration as appropriate for pattern, and save obss = len(obslist) for obs in range(obss): k0,object,config,wppat,pairs = obslist[obs] patpairs = patternpairs[wppat] klist = range(k0,k0+pairs) # entries in comblist for this obs obsname = object+"_"+config wplist = [comblist[k][3][1:] for k in klist] patwplist = sorted((patpairs*"%1s%1s " % tuple(patterndict[wppat].flatten())).split()) plist = [patwplist.index(wplist[P]) for P in range(pairs)] k_p = np.zeros(patpairs,dtype=int) k_p[plist] = klist # idx in klist for each pair idx cycles_p = np.zeros_like(k_p) cycles_p[plist] = np.array([comblist[k][4] for k in klist]) # number of cycles in comb cycles_pw = np.zeros((patpairs,wavs),dtype=int) cycles_pw[plist] = cycles_kw[klist] # of ok cycles for each wavelength havecyclechi_p = np.zeros(patpairs,dtype=bool) havecyclechi_p[plist] = havecyclechi_k[klist] havelinhichi_p = np.zeros(patpairs,dtype=bool) # name result to document hw cycles included kplist = list(k_p) if cycles_p.max()==cycles_p.min(): kplist = [klist[0],] for p in range(len(kplist)): obsname += "_" j0 = comblist[k_p[p]][0] - cycles_p[p] + 1 for j in range(j0,j0+cycles_p[p]): obsname+=rawlist[j][4][-1] log.message("\n Observation: %s Date: %s" % (obsname,dateobs), with_header=False) finstokes = patternstokes[wppat] if pairs != patpairs: if (pairs<2): log.message((' Only %1i pair, skipping observation' % pairs), with_header=False) continue elif ((max(plist) < 2) | (min(plist) > 1)): log.message(' Pattern not usable, skipping observation', with_header=False) continue stokes_Fw = np.zeros((finstokes,wavs)) var_Fw = np.zeros_like(stokes_Fw) covar_Fw = np.zeros_like(stokes_Fw) # normalize pairs in obs at wavelengths _W where all pair/cycles have data: okall_w = okcal_w & (cycles_pw[plist] == cycles_p[plist,None]).all(axis=0) normint_K = stokes_kSw[klist,0][:,okall_w].sum(axis=1) normint_K /= np.mean(normint_K) stokes_kSw[klist] /= normint_K[:,None,None] var_kSw[klist] /= normint_K[:,None,None]**2 covar_kSw[klist] /= normint_K[:,None,None]**2 # first, the intensity stokes_Fw[0] = stokes_kSw[klist,0].sum(axis=0)/pairs var_Fw[0] = var_kSw[klist,0].sum(axis=0)/pairs**2 covar_Fw[0] = covar_kSw[klist,0].sum(axis=0)/pairs**2 # now, the polarization stokes if wppat.count('LINEAR'): var_Fw = np.vstack((var_Fw,np.zeros(wavs))) # add QU covariance if (wppat=='LINEAR'): # wavelengths with both pairs having good, calibratable data in at least one cycle ok_w = okcal_w & (cycles_pw[plist] > 0).all(axis=0) bpm_Fw = np.repeat((np.logical_not(ok_w))[None,:],finstokes,axis=0) stokes_Fw[1:,ok_w] = stokes_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w]) var_Fw[1:3,ok_w] = var_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w])**2 covar_Fw[1:,ok_w] = covar_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w])**2 if debug: np.savetxt(obsname+"_stokes.txt",np.vstack((wav_w,ok_w.astype(int),stokes_Fw)).T, \ fmt="%8.2f "+"%2i "+3*" %10.6f") np.savetxt(obsname+"_var.txt",np.vstack((wav_w,ok_w.astype(int),var_Fw)).T, \ fmt="%8.2f "+"%2i "+4*"%14.9f ") np.savetxt(obsname+"_covar.txt",np.vstack((wav_w,ok_w.astype(int),covar_Fw)).T, \ fmt="%8.2f "+"%2i "+3*"%14.9f ") elif wppat=='LINEAR-HI': # for Linear-Hi, must go to normalized stokes in order for the pair combination to cancel systematic errors # each pair p at each wavelength w is linear combination of pairs, including primary p and secondary sec_p # linhi chisq is from comparison of primary and secondary # evaluate wavelengths with at least both pairs 0,2 or 1,3 having good, calibratable data in at least one cycle: ok_pw = okcal_w[None,:] & (cycles_pw > 0) ok_w = (ok_pw[0] & ok_pw[2]) | (ok_pw[1] & ok_pw[3]) bpm_Fw = np.repeat((np.logical_not(ok_w))[None,:],finstokes,axis=0) stokespri_pw = np.zeros((patpairs,wavs)) varpri_pw = np.zeros_like(stokespri_pw) covarpri_pw = np.zeros_like(stokespri_pw) stokespri_pw[plist] = nstokes_kw[klist] varpri_pw[plist] = nvar_kw[klist] covarpri_pw[plist] = ncovar_kw[klist] haveraw_pw = (cycles_pw > 0) pricof_ppw = np.identity(patpairs)[:,:,None]*haveraw_pw[None,:,:] qq = 1./np.sqrt(2.) seccofb_pp = np.array([[ 0,1, 0,-1],[1, 0,1, 0],[ 0,1, 0,1],[-1, 0,1, 0]])*qq # both secs avail seccof1_pp = np.array([[qq,1,-qq, 0],[1,qq,0, qq],[-qq,1,qq,0],[-1, qq,0,qq]])*qq # only 1st sec seccof2_pp = np.array([[qq,0, qq,-1],[0,qq,1,-qq],[ qq,0,qq,1],[ 0,-qq,1,qq]])*qq # only 2nd sec seclist_p = np.array([[1,3],[0,2],[1,3],[0,2]]) havesecb_pw = haveraw_pw[seclist_p].all(axis=1) onlysec1_pw = (np.logical_not(havesecb_pw) & haveraw_pw[seclist_p][:,0] & havesecb_pw[seclist_p][:,1]) onlysec2_pw = (np.logical_not(havesecb_pw) & haveraw_pw[seclist_p][:,1] & havesecb_pw[seclist_p][:,0]) seccof_ppw = seccofb_pp[:,:,None]*havesecb_pw[:,None,:] + \ seccof1_pp[:,:,None]*onlysec1_pw[:,None,:] + \ seccof2_pp[:,:,None]*onlysec2_pw[:,None,:] stokessec_pw = (seccof_ppw*stokespri_pw[:,None,:]).sum(axis=0) varsec_pw = (seccof_ppw**2*varpri_pw[:,None,:]).sum(axis=0) covarsec_pw = (seccof_ppw**2*covarpri_pw[:,None,:]).sum(axis=0) havesec_pw = (havesecb_pw | onlysec1_pw | onlysec2_pw) prisec_pw = (haveraw_pw & havesec_pw) onlypri_pw = (haveraw_pw & np.logical_not(havesec_pw)) onlysec_pw = (np.logical_not(haveraw_pw) & havesec_pw) cof_ppw = onlypri_pw[:,None,:]*pricof_ppw + onlysec_pw[:,None,:]*seccof_ppw + \ 0.5*prisec_pw[:,None,:]*(pricof_ppw+seccof_ppw) # now do the combination stokes_pw = (cof_ppw*stokespri_pw[None,:,:]).sum(axis=1) var_pw = (cof_ppw**2*varpri_pw[None,:,:]).sum(axis=1) covar_pw = (cof_ppw**2*covarpri_pw[None,:,:]).sum(axis=1) covarprisec_pw = 0.5*varpri_pw*np.logical_or(onlysec1_pw,onlysec2_pw) covarqu_w = (cof_ppw[0]*cof_ppw[2]*varpri_pw).sum(axis=0) # cull wavelengths based on chisq between primary and secondary chi2linhi_pw = np.zeros((patpairs,wavs)) badlinhichi_w = np.zeros(wavs) havelinhichi_p = prisec_pw.any(axis=1) linhichis = havelinhichi_p.sum() chi2linhi_pw[prisec_pw] = ((stokespri_pw[prisec_pw] - stokessec_pw[prisec_pw])**2 / \ (varpri_pw[prisec_pw] + varsec_pw[prisec_pw] - 2.*covarprisec_pw[prisec_pw])) q3_p = np.percentile(chi2linhi_pw[:,okall_w].reshape((4,-1)),75,axis=1) badlinhichi_w[ok_w] = ((chi2linhi_pw[:,ok_w] > (chifence_d[2]*q3_p)[:,None])).any(axis=0) ok_w &= np.logical_not(badlinhichi_w) okall_w &= np.logical_not(badlinhichi_w) chi2linhi_p = np.zeros(patpairs) chi2linhi_p[havelinhichi_p] = (chi2linhi_pw[havelinhichi_p][:,ok_w]).sum(axis=1)/ \ (prisec_pw[havelinhichi_p][:,ok_w]).sum(axis=1) syserrlinhi_pw = np.zeros((patpairs,wavs)) varlinhi_pw = np.zeros((patpairs,wavs)) syserrlinhi_p = np.zeros(patpairs) syserrlinhi_pw[prisec_pw] = (stokespri_pw[prisec_pw] - stokessec_pw[prisec_pw]) varlinhi_pw[prisec_pw] = varpri_pw[prisec_pw] + varsec_pw[prisec_pw] - 2.*covarprisec_pw[prisec_pw] syserrlinhi_p[havelinhichi_p] = np.average(syserrlinhi_pw[havelinhichi_p][:,okall_w], \ weights=1./np.sqrt(varlinhi_pw[havelinhichi_p][:,okall_w]),axis=1) if debug: np.savetxt(obsname+"_have_pw.txt",np.vstack((wav_w,ok_pw.astype(int),haveraw_pw,havesecb_pw, \ onlysec1_pw,onlysec2_pw,havesec_pw,prisec_pw,onlypri_pw,onlysec_pw)).T, \ fmt="%8.2f "+9*"%2i %2i %2i %2i ") np.savetxt(obsname+"_seccof_ppw.txt",np.vstack((wav_w,ok_pw.astype(int),seccof_ppw.reshape((16,-1)))).T, \ fmt="%8.2f "+4*"%2i "+16*" %6.3f") np.savetxt(obsname+"_cof_ppw.txt",np.vstack((wav_w,ok_pw.astype(int),cof_ppw.reshape((16,-1)))).T, \ fmt="%8.2f "+4*"%2i "+16*" %6.3f") np.savetxt(obsname+"_stokes.txt",np.vstack((wav_w,ok_pw.astype(int),stokespri_pw,stokes_pw)).T, \ fmt="%8.2f "+4*"%2i "+8*" %10.6f") np.savetxt(obsname+"_var.txt",np.vstack((wav_w,ok_pw.astype(int),varpri_pw,var_pw)).T, \ fmt="%8.2f "+4*"%2i "+8*"%14.9f ") np.savetxt(obsname+"_covar.txt",np.vstack((wav_w,ok_pw.astype(int),covarpri_pw,covar_pw)).T, \ fmt="%8.2f "+4*"%2i "+8*"%14.9f ") np.savetxt(obsname+"_chi2linhi_pw.txt",np.vstack((wav_w,stokes_Fw[0],ok_pw.astype(int), \ chi2linhi_pw)).T, fmt="%8.2f %10.0f "+4*"%2i "+4*"%10.4f ") stokes_Fw[1:] = stokes_pw[[0,2]]*stokes_Fw[0] var_Fw[1:3] = var_pw[[0,2]]*stokes_Fw[0]**2 var_Fw[3] = covarqu_w*stokes_Fw[0]**2 covar_Fw[1:] = covar_pw[[0,2]]*stokes_Fw[0]**2 bpm_Fw = ((bpm_Fw==1) | np.logical_not(ok_w)).astype(int) # document chisq results, combine flagoffs, compute mean chisq for observation, combine with final bpm if (havecyclechi_p.any() | havelinhichi_p.any()): chi2cyclenet = 0. syserrcyclenet = 0. chi2linhinet = 0. syserrlinhinet = 0. if havecyclechi_p.any(): log.message(("\n"+14*" "+"{:^"+str(5*patpairs)+"}{:^"+str(8*patpairs)+"}{:^"+str(6*patpairs)+"}")\ .format("culled","sys %err","mean chisq"), with_header=False) log.message((9*" "+"HW "+patpairs*" %4s"+patpairs*" %7s"+patpairs*" %5s") \ % tuple(3*patwplist),with_header=False) jlist = sum([jlistk[k] for k in klist],[]) Jlist = list(set(sum([Jlistk[k] for k in klist],[]))) Jmax = max(Jlist) ok_pJ = np.zeros((patpairs,Jmax+1),dtype=bool) for p in plist: ok_pJ[p][Jlistk[k_p[p]]] = True syserrcycle_pJ = np.zeros((patpairs,Jmax+1)) syserrcycle_pJ[ok_pJ] = syserrcycle_j[jlist] syserrcyclenet_p = np.zeros(patpairs) syserrcyclenet_p[plist] = syserrcyclenet_k[klist] syserrcyclenet = np.sqrt((syserrcyclenet_p**2).sum()/patpairs) chi2cycle_pJ = np.zeros((patpairs,Jmax+1)) chi2cycle_pJ[ok_pJ] = chi2cycle_j[jlist] chi2cyclenet_p = np.zeros(patpairs) chi2cyclenet_p[plist] = chi2cyclenet_k[klist] chi2cyclenet = chi2cyclenet_p.sum()/patpairs culls_pJ = np.zeros((patpairs,Jmax+1),dtype=int) culls_pJ[ok_pJ] = iscull_jw[jlist].sum(axis=1) if cycles_p.max() > 2: for J in set(Jlist): log.message(((" cycle %2i: "+patpairs*"%4i "+patpairs*"%7.3f "+patpairs*"%5.2f ") % \ ((J+1,)+tuple(culls_pJ[:,J])+tuple(100.*syserrcycle_pJ[:,J])+tuple(chi2cycle_pJ[:,J]))), \ with_header=False) netculls_p = [iscull_jw[jlistk[k_p[p]]].all(axis=0).sum() for p in range(patpairs)] log.message((" net : "+patpairs*"%4i "+patpairs*"%7.3f "+patpairs*"%5.2f ") % \ (tuple(netculls_p)+tuple(100*syserrcyclenet_p)+tuple(chi2cyclenet_p)), with_header=False) if (havelinhichi_p.any()): log.message(("\n"+14*" "+"{:^"+str(5*patpairs)+"}{:^"+str(8*patpairs)+"}{:^"+str(6*patpairs)+"}")\ .format("culled","sys %err","mean chisq"), with_header=False) log.message((9*" "+"HW "+(4*patpairs/2)*" "+" all"+(4*patpairs/2)*" "+patpairs*" %7s"+patpairs*" %5s") \ % tuple(2*patwplist),with_header=False) chicount = int(badlinhichi_w.sum()) chi2linhinet = chi2linhi_p.sum()/(havelinhichi_p.sum()) syserrlinhinet = np.sqrt((syserrlinhi_p**2).sum()/(havelinhichi_p.sum())) log.message((" Linhi: "+(2*patpairs)*" "+"%3i "+(2*patpairs)*" "+patpairs*"%7.3f "+patpairs*"%5.2f ") % \ ((chicount,)+tuple(100.*syserrlinhi_p)+tuple(chi2linhi_p)), with_header=False) chi2qudof = (chi2cyclenet+chi2linhinet)/(int(chi2cyclenet>0)+int(chi2linhinet>0)) syserr = np.sqrt((syserrcyclenet**2+syserrlinhinet**2)/ \ (int(syserrcyclenet>0)+int(syserrlinhinet>0))) log.message(("\n Estimated sys %%error: %5.3f%% Mean Chisq: %6.2f") % \ (100.*syserr,chi2qudof), with_header=False) if not HW_Cal_override: # apply hw efficiency, equatorial PA rotation calibration stokes_Fw[1:,ok_w] /= heff_w[ok_w] var_Fw[1:,ok_w] /= heff_w[ok_w]**2 covar_Fw[1:,ok_w] /= heff_w[ok_w]**2 stokes_Fw,var_Fw,covar_Fw = specpolrotate(stokes_Fw,var_Fw,covar_Fw,eqpar_w) # save final stokes fits file for this observation. Strain out nans. infile = infilelist[rawlist[comblist[k][0]][0]] hduout = pyfits.open(infile) hduout['SCI'].data = np.nan_to_num(stokes_Fw.reshape((3,1,-1))) hduout['SCI'].header['CTYPE3'] = 'I,Q,U' hduout['VAR'].data = np.nan_to_num(var_Fw.reshape((4,1,-1))) hduout['VAR'].header['CTYPE3'] = 'I,Q,U,QU' hduout['COV'].data = np.nan_to_num(covar_Fw.reshape((3,1,-1))) hduout['COV'].header['CTYPE3'] = 'I,Q,U,QU' hduout['BPM'].data = bpm_Fw.astype('uint8').reshape((3,1,-1)) hduout['BPM'].header['CTYPE3'] = 'I,Q,U' hduout[0].header['WPPATERN'] = wppat hduout[0].header['PATYPE'] = pacaltype if len(calhistorylist): for line in calhistorylist: hduout[0].header.add_history(line) if (havecyclechi_p.any() | havelinhichi_p.any()): hduout[0].header['SYSERR'] = (100.*syserr,'estimated % systematic error') outfile = obsname+'_stokes.fits' hduout.writeto(outfile,overwrite=True,output_verify='warn') log.message('\n '+outfile+' Stokes I,Q,U', with_header=False) # apply flux calibration, if available fluxcal_w = specpolflux(outfile,logfile=logfile) if fluxcal_w.shape[0]>0: stokes_Fw *= fluxcal_w var_Fw *= fluxcal_w**2 covar_Fw *= fluxcal_w**2 # calculate, print means (stokes averaged in unnorm space) avstokes_f, avvar_f, avwav = spv.avstokes(stokes_Fw,var_Fw[:-1],covar_Fw,wav_w) avstokes_F = np.insert(avstokes_f,0,1.) avvar_F = np.insert(avvar_f,0,1.) spv.printstokes(avstokes_F,avvar_F,avwav,tcenter=np.pi/2.,textfile='tmp.log') log.message(open('tmp.log').read(), with_header=False) os.remove('tmp.log') # elif wppat.count('CIRCULAR'): TBS # elif wppat=='ALL-STOKES': TBS # end of obs loop # end of config loop return
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch", batch) with chainer.no_backprop_mode(), chainer.using_config('train', False): wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) cxs = [ np.array( [source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs ] wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [wex[::-1] for wex in wexs] cexs_f = cexs cexs_b = [cex[::-1] for cex in cexs] _, hfw = self.encoder_fw(None, wexs_f) h1, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) h2, hbc = self.encoder_bc(None, cexs_b) hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc)) ys = self.xp.full(batch, EOS, 'i') result = [] h = F.concat([h1, h2], axis=2) for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder( h, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) h = F.transpose_sequence(h_list)[-1] h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1])) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def LocalSearch(pi4,x,y): num=random.choice(pi4) pj=pp[num,:] here=np.where(pi4==num) pi5=np.delete(pi4,here) ss2=np.zeros(x-1, dtype=int) d2=np.zeros((x-1,y), dtype=int) f2=np.zeros((x-1,y), dtype=int) d2,ss2=setuptime(pi4,x-1,y) f2=tailtime(pi4,x-1,y) minor=100000 for hero in range(x): cmax=np.zeros((y), dtype=int) d3=np.zeros(y, dtype=int) for j in range(y): if hero==0: if j==0: if (ss2[hero]+pj[j])>kk[j+1,0,r[hero]]: d3[j]=ss2[hero]+pj[j] else: d3[j]=kk[j+1,0,r[hero]] elif j < y-1: if d3[j-1]+pj[j]>kk[j+1,0,r[hero]]: d3[j]=d3[j-1]+pj[j] else: d3[j]=kk[j+1,0,r[hero]] else: d3[j]=d3[j-1]+pj[j] elif hero<x-1: if j==0: if (ss2[hero]+pj[j])>(d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]): d3[j]=ss2[hero]+pj[j] else: d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]] elif j < y-1: if d3[j-1]+pj[j]>d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]: d3[j]=d3[j-1]+pj[j] else: d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]] else: d3[j]=d3[j-1]+pj[j] else: ss20=d2[hero-1,0]+kk[0,r[hero-1],r[hero]] if j==0: if (ss20+pj[j])>(d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]): d3[j]=ss20+pj[j] else: d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]] elif j < y-1: if d3[j-1]+pj[j]>d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]: d3[j]=d3[j-1]+pj[j] else: d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]] else: d3[j]=d3[j-1]+pj[j] if hero<x-1: cmax[j]=d3[j]+f2[hero,j]+kk[j,r[hero],r[hero+1]] else: cmax[j]=d3[j] print(d3) pi5=np.insert(pi5,hero,num) print(pi5,cmax) print(setuptime1(pi6,x,y),tailtime(pi6,x,y),pi6,x,y) return pi5
def insert(self, timestamp, datum): insert_idx = np.searchsorted(self.data_ts, timestamp) self.data_ts = np.insert(self.data_ts, insert_idx, timestamp) self.data.insert(insert_idx, datum)
def get_ensemble_correction(ifile, star_names, star_array, eclat, eclon): """ Function that takes all input stars for a sector and uses them to find a detrending function using ensemble photometry for a star 'star_names[ifile]', where ifile is the index for the star in the star_array and star_names list. Parameters: ifile (int): index for the relevant star in the star_names list (and consequently also the star_array, eclat and eclon lists). star_names (ndarray): Labels of all the names of the stars in a given sector. star_array (ndarray): An array of class instances holding metadata on each star. (i.e. flux, time, mean flux, std of flux). eclat (ndarray): Ecliptic latitude for all stars. eclon (ndarray): Ecliptic longitude for all stars. Returns: pp (scipy.interpolate.PchipInterpolator): Interpolation function for the ensemble photometry trend for your given star. """ dist = np.zeros([2, len(star_names)]) dist[0] = range(len(star_names)) dist[1] = np.sqrt((eclat - eclat[ifile])**2 + (eclon - eclon[ifile])**2) #artificially increase distance to the star itself, so when we sort by distance it ends up last dist = np.transpose(dist) #dist[ifile][1] = 10*np.pi dist[ifile][1] = 10000.0 #sort by distance sort_dist = np.sort(dist, 0) #set up initial search radius to build ensemble so that 20 stars are included search_radius = sort_dist[19][1] #20 works well for 20s cadence...more for longer? #set up start/end times for stellar time series time_start = np.amin(star_array[ifile].time) time_end = np.max(star_array[ifile].time) #set minimum range parameter...this is log10 photometric range, and stars more variable than this will be #excluded from the ensemble min_range = -2.0 min_range0 = min_range flag = 1 #start loop to build ensemble while True: #num_star is number of stars in ensemble num_star = 0 #full_time,flux,weight are time,flux,weight points in the ensemble full_time = np.array([]) full_flux = np.array([]) full_flag = np.array([]) full_weight = np.array([]) tflux = np.array([]) comp_list = np.array([]) #loop through all other stars to build ensemble #exclude stars outside search radius, flagged, too active (either relatively or absolutely) #excluding stars with negative flux is only required because the synthetic data have some flawed #light curves that are <0. Probably can remove this with real data. # #Put the selection conditions into a boolean array for all stars simultaneously # sel = (dist[:,1] < search_radius) & (np.log10(drange) < min_range) & (drange < 10*drange[ifile]) for test_star in range(len(star_names[:])): if (dist[test_star][1] < search_radius and np.log10(star_array[test_star].drange) < min_range and star_array[test_star].drange < 10 * star_array[ifile].drange): num_star += 1 #calculate relative flux for star to be added to ensemble test0 = star_array[test_star].time test1 = star_array[test_star].flux test1 = test1 / star_array[test_star].fmean #calculate weight for star to be added to the ensemble. weight is whitened stdev relative to mean flux weight = np.ones_like(test1) weight = weight * star_array[test_star].fmean / star_array[ test_star].fstd #add time, flux, weight to ensemble light curve. flux is weighted flux full_time = np.append(full_time, test0) full_flux = np.append(full_flux, np.multiply(test1, weight)) full_weight = np.append(full_weight, weight) #tflux is total unweighted flux tflux = np.append(tflux, test1) comp_list = np.append(comp_list, test_star) #set up time array with 0.5-day resolution which spans the time range of the time series #then histogram the data based on that array gx = np.arange(time_start, time_end, 0.5) n = np.histogram(full_time, gx) n = np.asarray(n[0]) n2 = np.histogram(star_array[ifile].time, gx) n2 = np.asarray(n2[0]) #if the least-populated bin has less than 2000 points, increase the size of the ensemble by first #increasing the level of acceptable variability until it exceeds the variability of the star. Once that happens, #increase the search radius and reset acceptable variability back to initial value. If the search radius exceeds #a limiting value (pi/4 at this point), accept that we can't do any better. #if np.min(n[0])<400: #print np.min(n[n2>0]) if np.min(n[n2 > 0]) < 1000: #print min_range min_range = min_range + 0.3 if min_range > np.log10(np.max(star_array[ifile].drange)): #if (search_radius < 0.5): if (search_radius < 100): #search_radius = search_radius+0.1 search_radius = search_radius + 10 else: search_radius = search_radius * 1.1 min_range = min_range0 #if search_radius > np.pi/4: if search_radius > 400: break else: break #clean up ensemble points by removing NaNs full_time = full_time[~np.isnan(full_flux)] full_weight = full_weight[~np.isnan(full_flux)] full_flux = full_flux[~np.isnan(full_flux)] tflux = tflux[~np.isnan(full_flux)] #sort ensemble into time order idx = np.argsort(full_time) full_time = full_time[idx] full_flux = full_flux[idx] full_weight = full_weight[idx] #temporary copies of ensemble components full_time0 = full_time full_flux0 = full_flux full_weight0 = full_weight #set up temporary files temp_time = full_time temp_flux = full_flux temp_weight = full_weight #simplify by discarding ensemble points outside the temporal range of the stellar time series temp_time = full_time[(full_time > time_start) & (full_time < time_end)] temp_flux = full_flux[(full_time > time_start) & (full_time < time_end)] temp_weight = full_weight[(full_time > time_start) & (full_time < time_end)] full_time = temp_time full_flux = temp_flux full_weight = temp_weight #identify locations where there is a break in the time series. If there is at least one break, identify #segments and label ensemble points by segment; bidx2 is the label. If there are no breaks, then identify #only one segment and label accordingly break_locs = np.where(np.diff(full_time) > 0.1) if np.size(break_locs) > 0: if (break_locs[0][-1] < np.size(full_time)): break_locs = np.append(break_locs, np.size(full_time) - 1) break_locs = np.insert(break_locs, 0, 0) cts, bin_edges = np.histogram(full_time, full_time[break_locs]) bidx2 = np.digitize(full_time, full_time[break_locs]) num_segs = np.size(break_locs) - 1 else: cts, bin_edges = np.histogram( full_time, np.squeeze(np.append(full_time[0], full_time[-1]))) bidx2 = np.digitize( full_time, np.squeeze(np.append(full_time[0], full_time[-1] + 1))) num_segs = 1 break_locs = np.append(0, np.size(full_time) - 1) #pp will be components of spline fit to ensemble for each segment pp_ensemble = [] #set up influx, inweight,intime as flux/weight/time of ensemble segment-by-segment for iseg in range(num_segs): influx = full_flux[bidx2 - 1 == iseg] inweight = full_weight[bidx2 - 1 == iseg] intime = full_time[bidx2 - 1 == iseg] intime0 = intime influx0 = influx #initialize bin size in days. We will fit the ensemble with splines bin_size = 2.0 for ib in range(7): gx = np.arange(time_start - .5 * bin_size, time_end + bin_size, bin_size) # bidx = np.digitize(full_time,gx) bidx = np.digitize(temp_time, gx) bidx = bidx - 1 # n, bin_edges = np.histogram(full_time,gx) #bin data n, bin_edges = np.histogram(temp_time, gx) #bin data #if there are too few points in the least-populated bin after the first couple of iterations, break out #and stop decreasing the size of the bins ttflux = [] ttweight = [] ttime = [] #bin by bin build temporary arrays for weight, time, flux for ix in range(len(n)): ttweight = np.append(ttweight, np.nanmean(temp_weight[bidx == ix])) ttime = np.append(ttime, np.nanmean(temp_time[bidx == ix])) ttflux = np.append( ttflux, np.nanmedian( np.divide(temp_flux[bidx == ix], temp_weight[bidx == ix]))) ottime = ttime #keep track of originals since we will modify the tt arrays otflux = ttflux #clean up any NaNs ttime = np.asarray(ttime) ttflux = np.asarray(ttflux) w1 = ttime[~np.isnan(ttflux)] w2 = ttflux[~np.isnan(ttflux)] # pp = scipy.interpolate.splrep(w1,w2,k=3) #interpolate a spline across the bins counter = len(ttime) while counter > 0: pp = scipy.interpolate.pchip(w1, w2) diff1 = np.divide(temp_flux, temp_weight) - pp(temp_time) sdiff = 4 * np.nanstd(diff1) counter = len(diff1[np.abs(diff1) > sdiff]) temp_time = temp_time[np.abs(diff1) < sdiff] temp_flux = temp_flux[np.abs(diff1) < sdiff] temp_weight = temp_weight[np.abs(diff1) < sdiff] pp = scipy.interpolate.pchip(w1, w2) break_locs = np.where(np.diff(star_array[ifile].time) > 0.1) #find places where there is a break in time break_locs = np.array(break_locs) if break_locs.size > 0: #set up boundaries to correspond with breaks break_locs = np.array(break_locs) + 1 break_locs.astype(int) if (np.max(break_locs) < len(star_array[ifile].time)): break_locs = np.append(break_locs, len(star_array[ifile].time) - 1) digit_bounds = star_array[ifile].time digit_bounds = np.array(digit_bounds) digit_bounds = digit_bounds[break_locs] if digit_bounds[0] > np.min(full_time): digit_bounds = np.append( np.min(full_time) - 1e-5, digit_bounds) if digit_bounds[-1] < np.max(full_time): digit_bounds = np.append(digit_bounds, np.max(full_time) + 1e-5) if digit_bounds[0] > np.min(star_array[ifile].time): digit_bounds = np.append( np.min(star_array[ifile].time) - 1e-5, digit_bounds) if digit_bounds[-1] < np.max(star_array[ifile].time): digit_bounds = np.append(digit_bounds, np.max(star_array[ifile].time) + 1e-5) bincts, edges = np.histogram(star_array[ifile].time, digit_bounds) bidx = np.digitize(star_array[ifile].time, digit_bounds) #binning for star bidx = bidx - 1 bincts2, edges = np.histogram(full_time, full_time[break_locs]) bidx2 = np.digitize(full_time, full_time[break_locs]) #binning for ensemble bidx2 = bidx2 - 1 num_segs = len(break_locs) else: bincts, edges = np.histogram( star_array[ifile].time, [star_array[ifile].time[0], star_array[ifile].time[-1]]) bidx = np.digitize( star_array[ifile].time, [star_array[ifile].time[0], star_array[ifile].time[-1] ]) #binning for star bidx = bidx - 1 bincts2, edges = np.histogram(full_time, [full_time[0], full_time[-1]]) bidx2 = np.digitize( full_time, [full_time[0], full_time[-1]]) #binning for ensemble bidx2 = bidx2 - 1 num_segs = 1 tscale = [] for iseg in range(num_segs): influx = np.array(star_array[ifile].flux) intime = np.array(star_array[ifile].time) influx = influx[bidx == iseg] intime = intime[bidx == iseg] # fun = lambda x: np.sum(np.square(np.divide(influx,np.median(influx))-x*scipy.interpolate.splev(intime,pp))) fun = lambda x: np.sum( np.square( np.divide(influx, np.median(influx)) - x * pp(intime))) tscale = np.append(tscale, sciopt.fminbound( fun, 0.9, 1.5)) #this is a last fix to scaling, not currently used tbidx = deepcopy(bidx) bin_size = bin_size / 2 return pp
def load(self): if self.loaded: return with open(self.filepath, 'rb') as f: message = Decoder().process(f.read()) queryer = DataQuerent(NodePathParser()) self._lons = [] self._lats = [] self._wind = [] self._pres = [] for subset in range(52): # lat try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_LAT)).all_values() except IndexError: raw_lats = np.empty(41) raw_lats[:] = np.nan else: raw_lats = np.array(values[0][3], dtype='float')[:, 0] raw_lats = np.insert(raw_lats, 0, values[0][1]) self._lats.append(raw_lats) # lon try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_LON)).all_values() except IndexError: raw_lons = np.empty(41) raw_lons[:] = np.nan else: raw_lons = np.array(values[0][3], dtype='float')[:, 0] raw_lons = np.insert(raw_lons, 0, values[0][1]) raw_lons[raw_lons < 0] = raw_lons[raw_lons < 0] + 360 self._lons.append(raw_lons) # wind try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_WIND)).all_values(flat=True) except IndexError: raw_wind = np.empty(41) raw_wind[:] = np.nan else: raw_wind = np.array(values[0], dtype='float') * 1.94 # to kt self._wind.append(raw_wind) # pres try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_PRES)).all_values(flat=True) except IndexError: raw_pres = np.empty(41) raw_pres[:] = np.nan else: raw_pres = np.array(values[0], dtype='float') / 100 # to hPa self._pres.append(raw_pres) self.invalid_indices = [] self.invalid_majors = [] self._lats = self.compact_mean(self._lats) self._lons = self.compact_mean(self._lons) self._wind = self.compact_mean(self._wind) self._pres = self.compact_mean(self._pres) invalid_index = min(self.invalid_indices) invalid_major = min(self.invalid_majors) print(invalid_index, invalid_major) self.cut_major(self._lats, invalid_major) self.cut_major(self._lons, invalid_major) self.cut_major(self._wind, invalid_major) self.cut_major(self._pres, invalid_major) self._lats[-1, invalid_index:] = np.nan self._lons[-1, invalid_index:] = np.nan self._wind[-1, invalid_index:] = np.nan self._pres[-1, invalid_index:] = np.nan self._maxwind = np.nanmax(self._wind, axis=1) self._minpres = np.nanmin(self._pres, axis=1) #print(self._maxwind) #print(self._minpres) self.loaded = True
def fill(pre_el, next_el, position, sample_space, output): for j in range(sample_space): sample = (pre_el + next_el) // (sample_space + 1) * (j + 1) output = np.insert(output, position + j, sample.reshape(2), axis=0) return output
def compute_FI_and_GL(X, y, indices_to_target, target_weights, is_multi_label=True, path_to_keras_model=None): """ compute FL and GL for the given inputs """ ## Now, start localisation !!! ## from sklearn.preprocessing import Normalizer from collections.abc import Iterable norm_scaler = Normalizer(norm="l1") total_cands = {} FIs = None grad_scndcr = None #t0 = time.time() ## slice inputs target_X = X[indices_to_target] target_y = y[indices_to_target] # get loss func loss_func = model_util.get_loss_func(is_multi_label=is_multi_label) model = None for idx_to_tl, vs in target_weights.items(): t1 = time.time() t_w, lname = vs model = load_model(path_to_keras_model, compile=False) if idx_to_tl == 0: # meaning the model doesn't specify the input layer explicitly prev_output = target_X else: prev_output = model.layers[idx_to_tl - 1].output layer_config = model.layers[idx_to_tl].get_config() if model_util.is_FC(lname): from_front = [] if idx_to_tl == 0 or idx_to_tl - 1 == 0: prev_output = target_X else: t_model = Model(inputs=model.input, outputs=model.layers[idx_to_tl - 1].output) prev_output = t_model.predict(target_X) if len(prev_output.shape) == 3: prev_output = prev_output.reshape(prev_output.shape[0], prev_output.shape[-1]) for idx in tqdm(range(t_w.shape[-1])): assert int( prev_output.shape[-1]) == t_w.shape[0], "{} vs {}".format( int(prev_output.shape[-1]), t_w.shape[0]) output = np.multiply(prev_output, t_w[:, idx]) # -> shape = prev_output.shape output = np.abs(output) output = norm_scaler.fit_transform(output) output = np.mean(output, axis=0) from_front.append(output) from_front = np.asarray(from_front) from_front = from_front.T from_behind = compute_gradient_to_output(path_to_keras_model, idx_to_tl, target_X) #print ("shape", from_front.shape, from_behind.shape) FIs = from_front * from_behind ############ FI end ######### # Gradient grad_scndcr = compute_gradient_to_loss(path_to_keras_model, idx_to_tl, target_X, target_y, loss_func=loss_func) # G end elif model_util.is_C2D(lname): is_channel_first = layer_config['data_format'] == 'channels_first' if idx_to_tl == 0 or idx_to_tl - 1 == 0: prev_output_v = target_X else: t_model = Model(inputs=model.input, outputs=model.layers[idx_to_tl - 1].output) prev_output_v = t_model.predict(target_X) tr_prev_output_v = np.moveaxis( prev_output_v, [1, 2, 3], [3, 1, 2]) if is_channel_first else prev_output_v kernel_shape = t_w.shape[:2] strides = layer_config['strides'] padding_type = layer_config['padding'] if padding_type == 'valid': paddings = [0, 0] else: if padding_type == 'same': #P = ((S-1)*W-S+F)/2 true_ws_shape = [t_w.shape[0], t_w.shape[-1]] # Channel_in, Channel_out paddings = [ int(((strides[i] - 1) * true_ws_shape[i] - strides[i] + kernel_shape[i]) / 2) for i in range(2) ] elif not isinstance(padding_type, str) and isinstance( padding_type, Iterable): # explicit paddings given paddings = list(padding_type) if len(paddings) == 1: paddings = [paddings[0], paddings[0]] else: print( "padding type: {} not supported".format(padding_type)) paddings = [0, 0] assert False # add padding if is_channel_first: paddings_per_axis = [[0, 0], [0, 0], [paddings[0], paddings[0]], [paddings[1], paddings[1]]] else: paddings_per_axis = [[0, 0], [paddings[0], paddings[0]], [paddings[1], paddings[1]], [0, 0]] tr_prev_output_v = np.pad(tr_prev_output_v, paddings_per_axis, mode='constant', constant_values=0) # zero-padding if is_channel_first: num_kernels = int(prev_output.shape[1]) # Channel_in else: # channels_last assert layer_config[ 'data_format'] == 'channels_last', layer_config[ 'data_format'] num_kernels = int(prev_output.shape[-1]) # Channel_in assert num_kernels == t_w.shape[2], "{} vs {}".format( num_kernels, t_w.shape[2]) #print ("t_w***", t_w.shape) # H x W if is_channel_first: # the last two (front two are # of inputs and # of kernels (Channel_in)) input_shape = [int(v) for v in prev_output.shape[2:]] else: input_shape = [int(v) for v in prev_output.shape[1:-1]] # (W1−F+2P)/S+1, W1 = input volumne , F = kernel, P = padding n_mv_0 = int((input_shape[0] - kernel_shape[0] + 2 * paddings[0]) / strides[0] + 1) # H_out n_mv_1 = int((input_shape[1] - kernel_shape[1] + 2 * paddings[1]) / strides[1] + 1) # W_out n_output_channel = t_w.shape[-1] # Channel_out from_front = [] # move axis for easier computation for idx_ol in tqdm(range(n_output_channel)): # t_w.shape[-1] for i in range(n_mv_0): # H for j in range(n_mv_1): # W curr_prev_output_slice = tr_prev_output_v[:, i * strides[0]: i * strides[0] + kernel_shape[ 0], :, :] curr_prev_output_slice = curr_prev_output_slice[:, :, j * strides[ 1]:j * strides[1] + kernel_shape[1], :] output = curr_prev_output_slice * t_w[:, :, :, idx_ol] sum_output = np.sum(np.abs(output)) output = output / sum_output sum_output = np.nan_to_num(output, posinf=0.) output = np.mean(output, axis=0) from_front.append(output) from_front = np.asarray(from_front) #from_front.shape: [Channel_out * n_mv_0 * n_mv_1, F1, F2, Channel_in] if is_channel_first: from_front = from_front.reshape( (n_output_channel, n_mv_0, n_mv_1, kernel_shape[0], kernel_shape[1], int(prev_output.shape[1]))) else: # channels_last from_front = from_front.reshape( (n_mv_0, n_mv_1, n_output_channel, kernel_shape[0], kernel_shape[1], int(prev_output.shape[-1]))) # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1] # or [F1,F2,Channel_in, n_mv_0, n_mv_1,Channel_out] from_front = np.moveaxis(from_front, [0, 1, 2], [3, 4, 5]) # [Channel_out, H_out(n_mv_0), W_out(n_mv_1)] from_behind = compute_gradient_to_output(path_to_keras_model, idx_to_tl, target_X, by_batch=True) #t1 = time.time() # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1] (channels_firs) # or [F1,F2,Channel_in,n_mv_0, n_mv_1,Channel_out] (channels_last) FIs = from_front * from_behind #t2 = time.time() #print ('Time for multiplying front and behind results: {}'.format(t2 - t1)) #FIs = np.mean(np.mean(FIs, axis = -1), axis = -1) # [F1, F2, Channel_in, Channel_out] if is_channel_first: FIs = np.sum(np.sum(FIs, axis=-1), axis=-1) # [F1, F2, Channel_in, Channel_out] else: FIs = np.sum(np.sum(FIs, axis=-2), axis=-2) # [F1, F2, Channel_in, Channel_out] #t3 = time.time() #print ('Time for computing mean for FIs: {}'.format(t3 - t2)) ## Gradient # will be [F1, F2, Channel_in, Channel_out] grad_scndcr = compute_gradient_to_loss(path_to_keras_model, idx_to_tl, target_X, target_y, by_batch=True, loss_func=loss_func) elif model_util.is_LSTM(lname): # from scipy.special import expit as sigmoid num_weights = 2 assert len(t_w) == num_weights, t_w # t_w_kernel: # (input_feature_size, 4 * num_units). t_w_recurr_kernel: (num_units, 4 * num_units) t_w_kernel, t_w_recurr_kernel = t_w # get the previous output, which will be the input of the lstm if model_util.is_Input(type(model.layers[idx_to_tl - 1]).__name__): prev_output = target_X else: # shape = (batch_size, time_steps, input_feature_size) t_model = Model(inputs=model.input, outputs=model.layers[idx_to_tl - 1].output) prev_output = t_model.predict(target_X) assert len(prev_output.shape) == 3, prev_output.shape num_features = prev_output.shape[ -1] # the dimension of features that will be processed by the model num_units = t_w_recurr_kernel.shape[0] assert t_w_kernel.shape[ 0] == num_features, "{} (kernel) vs {} (input)".format( t_w_kernel.shape[0], num_features) # hidden state and cell state sequences computation # generate a temporary model that only contains the target lstm layer # but with the modification to return sequences of hidden and cell states temp_lstm_layer_inst = lstm_layer.LSTM_Layer( model.layers[idx_to_tl]) hstates_sequence, cell_states_sequence = temp_lstm_layer_inst.gen_lstm_layer_from_another( prev_output) init_hstates, init_cell_states = lstm_layer.LSTM_Layer.get_initial_state( model.layers[idx_to_tl]) if init_hstates is None: init_hstates = np.zeros((len(target_X), num_units)) if init_cell_states is None: # shape = (batch_size, num_units) init_cell_states = np.zeros((len(target_X), num_units)) # shape = (batch_size, time_steps + 1, num_units) hstates_sequence = np.insert(hstates_sequence, 0, init_hstates, axis=1) # shape = (batch_size, time_steps + 1, num_units) cell_states_sequence = np.insert(cell_states_sequence, 0, init_cell_states, axis=1) bias = model.layers[idx_to_tl].get_weights()[ -1] # shape = (4 * num_units,) indices_to_each_gates = np.array_split(np.arange(num_units * 4), 4) ## prepare all the intermediate outputs and the variables that will be used later idx_to_input_gate = 0 idx_to_forget_gate = 1 idx_to_cand_gate = 2 idx_to_output_gate = 3 # for kenerl, weight shape = (input_feature_size, num_units) # and for recurrent, (num_units, num_units), bias (num_units) # and the shape of all the intermedidate outpu is "(batch_size, time_step, num_units)" # input t_w_kernel_I = t_w_kernel[:, indices_to_each_gates[idx_to_input_gate]] t_w_recurr_kernel_I = t_w_recurr_kernel[:, indices_to_each_gates[ idx_to_input_gate]] bias_I = bias[indices_to_each_gates[idx_to_input_gate]] I = sigmoid( np.dot(prev_output, t_w_kernel_I) + np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_I) + bias_I) # forget t_w_kernel_F = t_w_kernel[:, indices_to_each_gates[idx_to_forget_gate]] t_w_recurr_kernel_F = t_w_recurr_kernel[:, indices_to_each_gates[ idx_to_forget_gate]] bias_F = bias[indices_to_each_gates[idx_to_forget_gate]] F = sigmoid( np.dot(prev_output, t_w_kernel_F) + np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_F) + bias_F) # cand t_w_kernel_C = t_w_kernel[:, indices_to_each_gates[idx_to_cand_gate]] t_w_recurr_kernel_C = t_w_recurr_kernel[:, indices_to_each_gates[ idx_to_cand_gate]] bias_C = bias[indices_to_each_gates[idx_to_cand_gate]] C = np.tanh( np.dot(prev_output, t_w_kernel_C) + np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_C) + bias_C) # output t_w_kernel_O = t_w_kernel[:, indices_to_each_gates[idx_to_output_gate]] t_w_recurr_kernel_O = t_w_recurr_kernel[:, indices_to_each_gates[ idx_to_output_gate]] bias_O = bias[indices_to_each_gates[idx_to_output_gate]] # shape = (batch_size, time_steps, num_units) O = sigmoid( np.dot(prev_output, t_w_kernel_O) + np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_O) + bias_O) # set arguments to compute forward impact for the neural weights from these four gates t_w_kernels = { 'input': t_w_kernel_I, 'forget': t_w_kernel_F, 'cand': t_w_kernel_C, 'output': t_w_kernel_O } t_w_recurr_kernels = { 'input': t_w_recurr_kernel_I, 'forget': t_w_recurr_kernel_F, 'cand': t_w_recurr_kernel_C, 'output': t_w_recurr_kernel_O } consts = {} consts['input'] = get_constants('input', F, I, C, O, cell_states_sequence) consts['forget'] = get_constants('forget', F, I, C, O, cell_states_sequence) consts['cand'] = get_constants('cand', F, I, C, O, cell_states_sequence) consts['output'] = get_constants('output', F, I, C, O, cell_states_sequence) # from_front's shape = (num_units, (num_features + num_units) * 4) # gate_orders = ['input', 'forget', 'cand', 'output'] from_front, gate_orders = lstm_local_front_FI_for_target_all( prev_output, hstates_sequence[:, :-1, :], num_units, t_w_kernels, t_w_recurr_kernels, consts) from_front = from_front.T # ((num_features + num_units) * 4, num_units) N_k_rk_w = int(from_front.shape[0] / 4) assert N_k_rk_w == num_features + num_units, "{} vs {}".format( N_k_rk_w, num_features + num_units) ## from behind from_behind = compute_gradient_to_output( path_to_keras_model, idx_to_tl, target_X, by_batch=True) # shape = (num_units,) #t1 = time.time() # shape = (N_k_rk_w, num_units) FIs_combined = from_front * from_behind #print ("Shape", from_behind.shape, FIs_combined.shape) #t2 = time.time() #print ('Time for multiplying front and behind results: {}'.format(t2 - t1)) # reshaping FIs_kernel = np.zeros( t_w_kernel.shape ) # t_w_kernel's shape (num_features, num_units * 4) FIs_recurr_kernel = np.zeros( t_w_recurr_kernel.shape ) # t_w_recurr_kernel's shape (num_units, num_units * 4) # from (4 * N_k_rk_w, num_units) to 4 * (N_k_rk_w, num_units) for i, FI_p_gate in enumerate( np.array_split(FIs_combined, 4, axis=0)): # FI_p_gate's shape = (N_k_rk_w, num_units) # -> will divided into (num_features, num_units) & (num_units, num_units) # local indices that will split FI_p_gate (shape = (N_k_rk_w, num_units)) # since we append the weights in order of a kernel weight and a recurrent kernel weight indices_to_features = np.arange(num_features) indices_to_units = np.arange(num_units) + num_features #FIs_kernel[indices_to_features + (i * N_k_rk_w)] # = FI_p_gate[indices_to_features] # shape = (num_features, num_units) #FIs_recurr_kernel[indices_to_units + (i * N_k_rk_w)] # = FI_p_gate[indices_to_units] # shape = (num_units, num_units) FIs_kernel[:, i * num_units:(i + 1) * num_units] = FI_p_gate[ indices_to_features] # shape = (num_features, num_units) FIs_recurr_kernel[:, i * num_units:( i + 1) * num_units] = FI_p_gate[ indices_to_units] # shape = (num_units, num_units) #t3 =time.time() FIs = [FIs_kernel, FIs_recurr_kernel ] # [(num_features, num_units*4), (num_units, num_units*4)] #print ('Time for formatting: {}'.format(t3 - t2)) ## Gradient grad_scndcr = compute_gradient_to_loss(path_to_keras_model, idx_to_tl, target_X, target_y, by_batch=True, loss_func=loss_func) else: print("Currenlty not supported: {}. (shoulde be filtered before)". format(lname)) import sys sys.exit() #t2 = time.time() #print ("Time for computing cost for the {} layer: {}".format(idx_to_tl, t2 - t1)) if not model_util.is_LSTM(target_weights[idx_to_tl] [1]): # only one weight variable to process pairs = np.asarray([grad_scndcr.flatten(), FIs.flatten()]).T total_cands[idx_to_tl] = {'shape': FIs.shape, 'costs': pairs} else: # currently, all of them go into here total_cands[idx_to_tl] = {'shape': [], 'costs': []} pairs = [] for _FIs, _grad_scndcr in zip(FIs, grad_scndcr): pairs = np.asarray([_grad_scndcr.flatten(), _FIs.flatten()]).T total_cands[idx_to_tl]['shape'].append(_FIs.shape) total_cands[idx_to_tl]['costs'].append(pairs) #t3 = time.time() #print ("Time for computing total costs: {}".format(t3 - t0)) return total_cands
Alpha = 0.0001 # Learning rate Epsilon = 0.000001 fileReader = open(sys.argv[1], 'r') lines = fileReader.readlines() #fileReader is now on EOF for c in lines[ 0]: #counts how many commas has the file, so while its a csv file it should work for any dimension if c == ',': nFeatures = nFeatures + 1 Theta = np.array([np.ones(nFeatures + 1)]) print("# features", nFeatures) y = np.array([[i.split(',')[nFeatures][:-1]] for i in lines], dtype=float) X = np.array([k.split(',')[0:nFeatures] for k in lines], dtype=float) X = np.insert(X, 0, 1, axis=1) #insert a column of 1's so we can use the bias print("theta shape:", Theta.shape) print("y shape:", y.shape) print("X shape:", X.shape) ''' for scailing features, if needed f=1 for f in range(nFeatures): # scailing data by some policy, changing it may improve your models performance maxFeature = np.max(X[:,f],0) if maxFeature < 1.0: continue else: X[:,f] = X[:,f] / maxFeature # scailing(i.e. normalize) the data (because of de disantce between mean values of the features)
#print probabilities #print ' ' max_=np.argmax(probabilities) #print max_ if max_==0: predicitions.append(10) else: predicitions.append(max_) return predicitions def evaluate(y,predicitions): correct=[1 if (a==b) else 0 for a,b in zip(y,predicitions)] return (1.0*sum(correct))/len(y)*100.0 data=loadmat('/mnt/407242D87242D1F8/study/anaconda/OneVsAll_logistic_Regression/data/ex4data1.mat') X=data['X'] y=data['y'] X = np.insert(X, 0, values=np.ones(5000), axis=1) zero=[1 if (item==10) else 0 for item in data['y']] X=np.array(X).reshape([5000,401]) y=np.array(y).reshape([5000,1]) beta=1 no_label=10 theta_all=oneVSall(X,y,beta,no_label) predicitions=predict(X,theta_all) print 'Training evaluation',evaluate(y,predicitions)
def read_rof(filename): """ Reads ROF binary file (generated by Rigol power supply) and returns it's data in numpy.ndarray format and file info (header) in dict format :param filename: ROF binary file name :return: data, head :rtype: numpy.ndarray, dict """ with open(filename, "rb") as fid: data = list() # read header info head = dict() head["filetype"] = fid.read(3).decode(ENCODING) fid.read(1) # unused last byte of file type value head["model"] = model_dict[fid.read(1)] fid.read(1) # unused byte head["data_info_len"] = struct.unpack('1h', fid.read(2))[0] head["data_len"] = struct.unpack('1i', fid.read(4))[0] head["head_crc"] = fid.read(2) head["data_crc"] = fid.read(2) head["period"] = struct.unpack('1i', fid.read(4))[0] head["points"] = struct.unpack('1i', fid.read(4))[0] head["oldest_data_subscript"] = fid.read(4) # values number == points * number_of_cahnnels * 2 data_values = head["points"] * ch_num[ head["model"]] * 2 # 2 columns (voltage, current) data_bytes = data_values * DATA_BYTES raw_data = fid.read(data_bytes) data = np.ndarray(shape=(head["points"], ch_num[head["model"]] * 2), dtype=DATA_FORMAT, buffer=raw_data) # convert to float data = data.astype(np.float32) # convert to volts and amperes data = data * VOLTS_AMPERES_COEFF # get time column x_data = np.array( [val * head["period"] for val in range(head["points"])], dtype=data.dtype, order="F") # add time column data = np.insert(data, 0, x_data, axis=1) # print("================================") # print("\n".join(str(line) for line in data)) # # for idx, line in enumerate(data): # if idx == 30: # break # print(CSV_SEPARATOR.join(str(val) for val in line)) return data, head
def saveHist(histAll, histObs, histRec, bin_edges, xtitle, fname, filters=['u_', 'g_', 'r_', 'i_', 'z_', 'y_', 'all']): c1 = '#5687A6' #Dali Blue (Andrew's AAS Poster) c2 = '#A62B1F' #Dai Red c3 = '#BF8A26' #Dali Beige fig, ax1 = plt.subplots( figsize=(8, 6), sharex=True) #can change to include cdf with ax1, ax2 histAll = np.insert(histAll, 0, 0) histObs = np.insert(histObs, 0, 0) for f in filters: histRec[f] = np.insert(histRec[f], 0, 0) #PDF ax1.step(bin_edges, histAll / np.sum(histAll), color=c1) ax1.step(bin_edges, histObs / np.sum(histObs), color=c2) for f in filters: lw = 1 if (f == 'all'): lw = 0.5 ax1.step(bin_edges, histRec[f] / np.sum(histRec[f]), color=c3, linewidth=lw) ax1.set_ylabel('PDF') ax1.set_yscale('log') ax1.set_title('Open Clusters - Baseline (crowding)', fontsize=16) ax1.set_xlabel(xtitle) #CDF #cdfAll = [] #cdfObs = [] #cdfRec = dict() #for f in filters: # cdfRec[f] = [] # for i in range(len(histAll)): # cdfAll.append(np.sum(histAll[:i])/np.sum(histAll)) # for i in range(len(histObs)): # cdfObs.append(np.sum(histObs[:i])/np.sum(histObs)) # for f in filters: # for i in range(len(histRec[f])): # cdfRec[f].append(np.sum(histRec[f][:i])/np.sum(histRec[f])) #ax2.step(bin_edges, cdfAll, color=c1) #ax2.step(bin_edges, cdfObs, color=c2) #for f in filters: # lw = 1 # if (f == 'all'): # lw = 0.5 # ax2.step(bin_edges, cdfRec[f], color=c3, linewidth=lw) #ax2.set_ylabel('CDF') #ax2.set_xlabel(xtitle) fig.subplots_adjust(hspace=0) fig.savefig(os.path.join('.', 'plots', fname + '.pdf'), format='pdf', bbox_inches='tight') #write to a text file with open(os.path.join('.', 'eblsst_files', fname + '.csv'), 'w') as fl: outline = 'binEdges,histAll,histObs' for f in filters: outline += ',' + f + 'histRec' outline += '\n' fl.write(outline) for i in range(len(bin_edges)): outline = str(bin_edges[i]) + ',' + str(histAll[i]) + ',' + str( histObs[i]) for f in filters: outline += ',' + str(histRec[f][i]) outline += '\n' fl.write(outline)
def predict(self, X): X = np.insert(X, 0, 1, axis=1) return self.sigmoid(np.dot(X, self.w))
def fusion_images(multispectral, panchromatic, save_image=False, savepath=None, timeCondition=True): end = 0 start = 0 #Verifica que ambas imagenes cumplan con las condiciones if multispectral.shape[2] == 3: print('The Multispectral image has '+str(multispectral.shape[2])+' channels and size of '+str(multispectral.shape[0])+'x'+str(multispectral.shape[1])) else: sys.exit('The first image is not multispectral') if len(panchromatic.shape) == 2: print(' The Panchromatic image has a size of '+str(panchromatic.shape[0])+'x'+str(panchromatic.shape[1])) else: sys.exit('The second image is not panchromatic') size_rgb = multispectral.shape # Definición del tamaño del bloque BLOCK_SIZE = 32 # Convierte a float32 y separa las bandas RGB de la multispectral m_host = multispectral.astype(np.float32) r_host = m_host[:,:,0].astype(np.float32) g_host = m_host[:,:,1].astype(np.float32) b_host = m_host[:,:,2].astype(np.float32) size_rgb = multispectral.shape # Convierte la pancromatica a float32 panchromatic_host = panchromatic.astype(np.float32) # Inicial el time_calculated de ejecucion start=time.time() # Se pasan los array en el host al device r_gpu = gpuarray.to_gpu(r_host) g_gpu = gpuarray.to_gpu(g_host) b_gpu = gpuarray.to_gpu(b_host) p_gpu = gpuarray.to_gpu(panchromatic_host) # Se calcula la media de cada una de las bandas y se forma un arreglo con estos valores, todo esto en GPU mean_r_gpu = misc.mean(r_gpu) mean_g_gpu = misc.mean(g_gpu) mean_b_gpu = misc.mean(b_gpu) # Se obtiene el numero de bandas n_bands = size_rgb[2] # Se aparta memoria en GPU r_gpu_subs = gpuarray.zeros_like(r_gpu,np.float32) g_gpu_subs = gpuarray.zeros_like(g_gpu,np.float32) b_gpu_subs = gpuarray.zeros_like(b_gpu,np.float32) # Se realiza la resta de su respectiva media a cada uno de los pixeles de cada banda, substract( r_gpu, mean_r_gpu.get(), r_gpu_subs) substract( g_gpu, mean_g_gpu.get(), g_gpu_subs) substract( b_gpu, mean_b_gpu.get(), b_gpu_subs) # Se divide cada una de las bandas después de ser restada su media, en un conjunto de submatrices cuadradas del tamaño del bloque r_subs_split = split(r_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) g_subs_split = split(g_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) b_subs_split = split(b_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) #Se obtiene la matrix de varianza y covarianza mat_var_cov = varianza_cov(r_subs_split,g_subs_split,b_subs_split) # Coeficiente para diaganalizar ortogonalmente coefficient = 1.0/((size_rgb[0]*size_rgb[1])-1) # Matriz diagonalizada ortogonalmente ortogonal_matrix = mat_var_cov*coefficient # Se calcula la traza de las sucesivas potencias de la matriz ortogonal inicial polynomial_trace = successive_powers(ortogonal_matrix) # Se calculan los coeficientes del polinomio caracteristico characteristic_polynomial = polynomial_coefficients(polynomial_trace, ortogonal_matrix) # Se obtienen las raices del polinomio caracteristico characteristic_polynomial_roots = np.roots(np.insert(characteristic_polynomial,0,1)) # Los vectores propios aparecen en la diagonal de la matriz eigenvalues_mat eigenvalues_mat = np.diag(characteristic_polynomial_roots) # Vectores propios para cada valor propio eigenvectors_mat = -1*ortogonal_matrix[1:n_bands,0] # Se calcular los vectores propios normalizados # Cada vector propio es una columna de la matriz mat_ortogonal_base mat_ortogonal_base, q_matrix = eigenvectors_norm(eigenvalues_mat, ortogonal_matrix, eigenvectors_mat) q_matrix_list = q_matrix.tolist() q_matrix_cpu = np.array(q_matrix_list).astype(np.float32) w1 = q_matrix_cpu[0,:] w2 = (-1)*q_matrix_cpu[1,:] w3 = q_matrix_cpu[2,:] eigenvectors = np.array((w1,w2,w3)) # Se calcula la inversa de los vectores propios inv_eigenvectors = la.inv(eigenvectors) inv_list = inv_eigenvectors.tolist() inv_eigenvector_cpu = np.array(inv_list).astype(np.float32) # Se realiza la división de las bandas en submatrices del tamaño del bloque r_subs_split_cp = split(r_host,BLOCK_SIZE,BLOCK_SIZE) g_subs_split_cp = split(g_host,BLOCK_SIZE,BLOCK_SIZE) b_subs_split_cp = split(b_host,BLOCK_SIZE,BLOCK_SIZE) # Se calculan los componentes principales con las bandas originales y los vectores propios pc_1,pc_2,pc_3 = componentes_principales_original(r_subs_split_cp,g_subs_split_cp,b_subs_split_cp,q_matrix_cpu,r_host.shape[0], BLOCK_SIZE) # Se realiza la división en submatrices de la pancromática, el componente principal 2 y 3, del tamaño del bloque, p_subs_split_nb = split(panchromatic_host,BLOCK_SIZE,BLOCK_SIZE) pc_2_subs_split_nb = split(pc_2,BLOCK_SIZE,BLOCK_SIZE) pc_3_subs_split_nb = split(pc_3,BLOCK_SIZE,BLOCK_SIZE) # Se calculan los componentes con la pancromatica, componentes principales originales 2 y 3, y la inversa de los vectores propios nb1,nb2,nb3 = componentes_principales_panchromartic(p_subs_split_nb,pc_2_subs_split_nb,pc_3_subs_split_nb,inv_eigenvector_cpu,r_host.shape[0], BLOCK_SIZE) nb11 = nb1.astype(np.float32) nb22 = nb2.astype(np.float32) nb33 = nb3.astype(np.float32) nb11_gpu = gpuarray.to_gpu(nb11) nb22_gpu = gpuarray.to_gpu(nb22) nb33_gpu = gpuarray.to_gpu(nb33) # Se separa espacio en memoria para las matrices resultado de realizar el ajuste nb111_gpu = gpuarray.empty_like(nb11_gpu) nb222_gpu = gpuarray.empty_like(nb22_gpu) nb333_gpu = gpuarray.empty_like(nb33_gpu) # Se realiza un ajuste cuando los valores de cada pixel es menor a 0, en GPU negative_adjustment(nb11_gpu,nb111_gpu) negative_adjustment(nb22_gpu,nb222_gpu) negative_adjustment(nb33_gpu,nb333_gpu) nb111_cpu = nb111_gpu.get().astype(np.uint8) nb222_cpu = nb222_gpu.get().astype(np.uint8) nb333_cpu = nb333_gpu.get().astype(np.uint8) end = time.time() fusioned_image=np.stack((nb111_cpu,nb222_cpu,nb333_cpu),axis=2); if(save_image): # Guarda la imagen resultando de acuerdo al tercer parametro establecido en la linea de ejecución del script if(savepath != None): t = skimage.io.imsave(savepath+'/pcagpu_image.tif',fusioned_image, plugin='tifffile') else: t = skimage.io.imsave('pcagpu_image.tif',fusioned_image, plugin='tifffile') #time_calculated de ejecución para la transformada de Brovey en GPU time_calculated = (end-start) if(timeCondition): return {"image": fusioned_image, "time" : time_calculated} else: return fusioned_image
def isCheck(self,showDiagonal=False): #Returns "white" if white is in check, "black" if black is in check #Otherwise returns Empty list #First check if white king is in check checks = [] diagonalCheckGivers = [3,5]#Bishop,queen verticalCheckGivers = [4,5]#Rook, queen try: whiteKing = zip(*np.where(self.board == 6))[0] except: self.showBoard() whiteKing = zip(*np.where(self.board == 6))[0] ###print whiteKing kingColumn = self.board[:,whiteKing[1]] kingColumn = kingColumn[kingColumn != 0] kingRow = self.board[whiteKing[0]] kingRow = kingRow[kingRow != 0] kingRightDiagonal = np.array([]) row = whiteKing[0] column = whiteKing[1] while row >= 0 and column >= 0: row -= 1 column -= 1 row += 1 column += 1 while row < 8 and column < 8: if self.board[row][column] != 0: kingRightDiagonal = np.append(kingRightDiagonal, self.board[row][column]) row += 1 column += 1 kingLeftDiagonal = np.array([]) row = whiteKing[0] column = whiteKing[1] while row >= 0 and column < 8: row -= 1 column += 1 row += 1 column -= 1 while row < 8 and column >= 0: if self.board[row][column] != 0: kingLeftDiagonal = np.append(kingLeftDiagonal, self.board[row][column]) row += 1 column -= 1 ###print kingColumn ###print kingRow ###print kingRightDiagonal ###print kingLeftDiagonal for row in [kingColumn,kingRow]: row = np.insert(row,0,0) row = np.append(row,0) kingIndex = np.where(row == 6)[0][0] if showDiagonal: print row in_front = row[kingIndex + 1] if -1*in_front in verticalCheckGivers: ##print("check from front") checks.append("white") in_back = row[kingIndex - 1] if -1*in_back in verticalCheckGivers: checks.append("white") for row in [kingLeftDiagonal,kingRightDiagonal]: row = np.insert(row,0,0) row = np.append(row,0) if showDiagonal: print row kingIndex = np.where(row == 6)[0][0] in_front = row[kingIndex + 1] if -1*in_front in diagonalCheckGivers: checks.append("white") in_back = row[kingIndex - 1] if -1*in_back in diagonalCheckGivers: checks.append("white") if whiteKing[0] < 7: if 0 < whiteKing[1] < 7: if self.board[whiteKing[0] + 1][whiteKing[1] + 1] == -1 or self.board[whiteKing[0] + 1][whiteKing[1] - 1] == -1: checks.append("white") elif whiteKing[1] == 0: if self.board[whiteKing[0] + 1][whiteKing[1] + 1] == -1: checks.append("white") else: if self.board[whiteKing[0] + 1][whiteKing[1] - 1] == -1: checks.append("white") #Knights for l in (-2,-1,1,2): for m in (-2,-1,1,2): if abs(l) != abs(m) and 0 <= whiteKing[0] + l <= 7 and 0 <= whiteKing[1] + m <= 7: if self.board[whiteKing[0] + l][whiteKing[1] + m] == -2: checks.append("white") #Kings for l in (-1,0,1): for m in (-1,0,1): if 0 <= whiteKing[0] + l <= 7 and 0 <= whiteKing[1] + m <= 7: if self.board[whiteKing[0] + l][whiteKing[1] + m] == -6: checks.append("white") try: blackKing = zip(*np.where(self.board == -6))[0] except: self.showBoard() blackKing = zip(*np.where(self.board == -6))[0] ###print blackKing kingColumn = self.board[:,blackKing[1]] kingColumn = kingColumn[kingColumn != 0] kingRow = self.board[blackKing[0]] kingRow = kingRow[kingRow != 0] kingRightDiagonal = np.array([]) row = blackKing[0] column = blackKing[1] while row >= 0 and column >= 0: row -= 1 column -= 1 row += 1 column += 1 while row < 8 and column < 8: if self.board[row][column] != 0: kingRightDiagonal = np.append(kingRightDiagonal, self.board[row][column]) row += 1 column += 1 kingLeftDiagonal = np.array([]) row = blackKing[0] column = blackKing[1] while row >= 0 and column < 8: row -= 1 column += 1 row += 1 column -= 1 while row < 8 and column >= 0: if self.board[row][column] != 0: kingLeftDiagonal = np.append(kingLeftDiagonal, self.board[row][column]) row += 1 column -= 1 ###print kingColumn ###print kingRow ###print kingRightDiagonal ###print kingLeftDiagonal for row in [kingColumn,kingRow]: row = np.insert(row,0,0) row = np.append(row,0) kingIndex = np.where(row == -6)[0][0] if showDiagonal: print row in_front = row[kingIndex + 1] if in_front in verticalCheckGivers: checks.append("black") in_back = row[kingIndex - 1] if in_back in verticalCheckGivers: checks.append("black") for row in [kingLeftDiagonal,kingRightDiagonal]: row = np.insert(row,0,0) row = np.append(row,0) kingIndex = np.where(row == -6)[0][0] if showDiagonal: print row in_front = row[kingIndex + 1] if in_front in diagonalCheckGivers: checks.append("black") in_back = row[kingIndex - 1] if in_back in diagonalCheckGivers: checks.append("black") if blackKing[0] > 0: if 0 < blackKing[1] < 7: if self.board[blackKing[0] - 1][blackKing[1] + 1] == 1 or self.board[blackKing[0] - 1][blackKing[1] - 1] == 1: checks.append("black") elif blackKing[1] == 0: if self.board[blackKing[0] - 1][blackKing[1] + 1] == 1: checks.append("black") else: if self.board[blackKing[0] - 1][blackKing[1] - 1] == 1: checks.append("black") #Knights for l in (-2,-1,1,2): for m in (-2,-1,1,2): if abs(l) != abs(m) and 0 <= blackKing[0] + l <= 7 and 0 <= blackKing[1] + m <= 7: if self.board[blackKing[0] + l][blackKing[1] + m] == 2: checks.append("black") #Kings for l in (-1,0,1): for m in (-1,0,1): if 0 <= blackKing[0] + l <= 7 and 0 <= blackKing[1] + m <= 7: if self.board[blackKing[0] + l][blackKing[1] + m] == 6: checks.append("black") ##print checks return checks
def generate_diag_and_features(dataset, path_dataset=""): path_dataset = "./data/" + dataset + "/" if not len( path_dataset) else path_dataset filepath = path_dataset + dataset + ".conf" dataset_type, filt_parameters, thresh, perslay_parameters, optim_parameters = load_config( filepath=filepath) if "REDDIT" in dataset: print( "Unfortunately, REDDIT data are not available yet for memory issues.\n" ) print("Moreover, the link we used to download the data,") print("http://www.mit.edu/~pinary/kdd/datasets.tar.gz") print("is down at the commit time (May 23rd).") print( "We will update this repository when we figure out a workaround.") return # if "REDDIT" in dataset: # _prepreprocess_reddit(dataset) if os.path.isfile(path_dataset + dataset + ".hdf5"): os.remove(path_dataset + dataset + ".hdf5") diag_file = h5py.File(path_dataset + dataset + ".hdf5") list_filtrations = filt_parameters["names"] [ diag_file.create_group(str(filtration)) for filtration in filt_parameters["names"] ] list_hks_times = np.unique( [filtration.split("_")[1] for filtration in list_filtrations]) if dataset_type == "graph": # preprocessing pad_size = 1 for graph_name in os.listdir(path_dataset + "mat/"): A = np.array(loadmat(path_dataset + "mat/" + graph_name)["A"], dtype=np.float32) pad_size = np.max((A.shape[0], pad_size)) features = pd.DataFrame( index=range(len(os.listdir(path_dataset + "mat/"))), columns=["label"] + ["eval" + str(i) for i in range(pad_size)] + [ name + "-percent" + str(i) for name, i in itertools.product( [f for f in list_hks_times if "hks" in f], 10 * np.arange(11)) ]) for idx, graph_name in enumerate((os.listdir(path_dataset + "mat/"))): name = graph_name.split("_") gid = int(name[name.index("gid") + 1]) - 1 A = np.array(loadmat(path_dataset + "mat/" + graph_name)["A"], dtype=np.float32) num_vertices = A.shape[0] label = int(name[name.index("lb") + 1]) L = csgraph.laplacian(A, normed=True) egvals, egvectors = eigh(L) basesimplex = get_base_simplex(A) eigenvectors = np.zeros([num_vertices, pad_size]) eigenvals = np.zeros(pad_size) eigenvals[:min(pad_size, num_vertices)] = np.flipud( egvals)[:min(pad_size, num_vertices)] eigenvectors[:, :min(pad_size, num_vertices)] = np.fliplr( egvectors)[:, :min(pad_size, num_vertices)] graph_features = [] graph_features.append(eigenvals) for fhks in list_hks_times: hks_time = float(fhks.split("-")[0]) # persistence filtration_val = hks_signature(egvectors, egvals, time=hks_time) dgmOrd0, dgmExt0, dgmRel1, dgmExt1 = apply_graph_extended_persistence( A, filtration_val, basesimplex) diag_file["Ord0_" + str(hks_time) + "-hks"].create_dataset( name=str(gid), data=dgmOrd0) diag_file["Ext0_" + str(hks_time) + "-hks"].create_dataset( name=str(gid), data=dgmExt0) diag_file["Rel1_" + str(hks_time) + "-hks"].create_dataset( name=str(gid), data=dgmRel1) diag_file["Ext1_" + str(hks_time) + "-hks"].create_dataset( name=str(gid), data=dgmExt1) # features graph_features.append( np.percentile( hks_signature(eigenvectors, eigenvals, time=hks_time), 10 * np.arange(11))) features.loc[gid] = np.insert(np.concatenate(graph_features), 0, label) features['label'] = features['label'].astype(int) elif dataset_type == "orbit": def _gen_orbit(num_pts_per_orbit, param): X = np.zeros([num_pts_per_orbit, 2]) xcur, ycur = np.random.rand(), np.random.rand() for idx in range(num_pts_per_orbit): xcur = (xcur + param * ycur * (1. - ycur)) % 1 ycur = (ycur + param * xcur * (1. - xcur)) % 1 X[idx, :] = [xcur, ycur] return X labs = [] count = 0 num_diag_per_param = 1000 if "5K" in dataset else 20000 for lab, r in enumerate([2.5, 3.5, 4.0, 4.1, 4.3]): print("Generating", num_diag_per_param, "orbits and diagrams for r = ", r, "...") for dg in range(num_diag_per_param): X = _gen_orbit(num_pts_per_orbit=1000, param=r) alpha_complex = gd.AlphaComplex(points=X) simplex_tree = alpha_complex.create_simplex_tree( max_alpha_square=1e50) simplex_tree.persistence() diag_file["Alpha0"].create_dataset( name=str(count), data=np.array( simplex_tree.persistence_intervals_in_dimension(0))) diag_file["Alpha1"].create_dataset( name=str(count), data=np.array( simplex_tree.persistence_intervals_in_dimension(1))) orbit_label = {"label": lab, "pcid": count} labs.append(orbit_label) count += 1 labels = pd.DataFrame(labs) labels.set_index("pcid") features = labels[["label"]] features.to_csv(path_dataset + dataset + ".csv") return diag_file.close()