def __init__(self, depth=9): self.depth = depth depth = 9 train = (3, 32, 32) nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] # Now construct the network layers = [Conv(**self.conv_params(3, 16))] layers.append(self.module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = self.module_s1(nfm) if stride == 1 else self.module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append(Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) self.model = model
def get_burst_photons(d, ich=0, ph_sel=Ph_sel('all')): """Return a list of arrays of photon timestamps in each burst. Arguments: d (Data): Data() object ich (int): channel index ph_sel (Ph_sel): photon selection. It allows to select timestamps from a specific photon selection. Example ph_sel=Ph_sel(Dex='Dem'). See :mod:`fretbursts.ph_sel` for details. Returns: A list of arrays of photon timestamps (one array per burst). """ bursts = d.mburst[ich] i_start, i_end = bursts.istart, bursts.istop ph_times = d.get_ph_times(ich) burst_slices = [slice(i1, i2 + 1) for i1, i2 in zip(i_start, i_end)] burst_photons = [ph_times[slice_i] for slice_i in burst_slices] if ph_sel != Ph_sel('all'): ph_times_mask = d.get_ph_mask(ich, ph_sel=ph_sel) photon_masks = [ph_times_mask[slice_i] for slice_i in burst_slices] burst_photons = [ph[mask] for ph, mask in zip(burst_photons, photon_masks)] return burst_photons
def expr_as_list(): iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) # multiple rows and columns res = 2 - iris res = h2o.as_list(res, use_pandas=False) res = list(zip(*res)) assert abs(float(res[0][4]) - -2.6) < 1e-10 and abs(float(res[1][5]) - -1.6) < 1e-10 and \ abs(float(res[2][11]) - 0.5) < 1e-10, "incorrect values" # single column res = 2 - iris res = h2o.as_list(res[0], use_pandas=False) res = list(zip(*res)) assert abs(float(res[0][4]) - -2.6) < 1e-10 and abs(float(res[0][18]) - -3.1) < 1e-10 and \ abs(float(res[0][25]) - -2.8) < 1e-10, "incorrect values" # local data frm = h2o.as_list(h2o.H2OFrame([[1,2,3]]), use_pandas=False) assert float(frm[1][2]) == 3, "incorrect values" frm = h2o.as_list(h2o.H2OFrame([[1,2,3], [4,5,6]]), use_pandas=False) assert float(frm[2][1]) == 5, "incorrect values"
def represent_table(table, indent=0, cell_wrap=str): """ Render a table cell_wrap is a method to wrap the cell values in """ if not table: return '' # calculate the width of each column table = [[str(cell).replace('|', r'\|') for cell in row] for row in table] lengths = [ max( get_terminal_width(cell) for cell in column ) for column in zip(*table) # transpose ] return '\n'.join( ' ' * indent + '| %s |' % ' | '.join(cell_wrap(ljust(cell, length)) for cell, length in zip(row, lengths)) for row in table )
def bootstrap(self): """ Return a new Alignment that is a bootstrap replicate of self """ new_sites = sorted(sample_wr(self.get_sites())) seqs = list(zip(self.get_names(), (''.join(seq) for seq in zip(*new_sites)))) return self.__class__(seqs)
def test_concat_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 2048] batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts] inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeMultistream(layers, merge="stack") assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.set_deltas(None) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts] err_concat = np.concatenate(err_lst) merge.bprop(be.array(err_concat)) dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert np.allclose(layer.dW.get(), dW_exp) return
def test_call(self): new_toy_snap = self.toy_modifier(self.toy_snapshot) assert_array_almost_equal(new_toy_snap.coordinates, self.toy_snapshot.coordinates) new_vel = new_toy_snap.velocities old_vel = self.toy_snapshot.velocities same_vel = [np.allclose(new_vel[i], old_vel[i]) for i in range(len(new_vel))] assert_equal(Counter(same_vel), Counter({True: 2, False: 1})) for new_v, old_v in zip(new_vel, old_vel): assert_almost_equal(sum([v**2 for v in new_v]), sum([v**2 for v in old_v])) new_omm_snap = self.openmm_modifier(self.openmm_snap) n_atoms = len(self.openmm_snap.coordinates) assert_array_almost_equal(new_omm_snap.coordinates, self.openmm_snap.coordinates) new_vel = new_omm_snap.velocities old_vel = self.openmm_snap.velocities same_vel = [np.allclose(new_vel[i], old_vel[i]) for i in range(len(new_vel))] same_vel = [np.allclose(new_vel[i], old_vel[i]) for i in range(len(new_vel))] assert_equal(Counter(same_vel), Counter({True: n_atoms-1, False: 1})) u_vel_sq = (old_div(u.nanometers, u.picoseconds))**2 for new_v, old_v in zip(new_vel, old_vel): assert_almost_equal( sum([(v**2).value_in_unit(u_vel_sq) for v in new_v]), sum([(v**2).value_in_unit(u_vel_sq) for v in old_v]) )
def permuted_copy(self, partition=None): """ Return a copy of the collection with all alignment columns permuted """ def take(n, iterable): return [next(iterable) for _ in range(n)] if partition is None: partition = Partition([1] * len(self)) index_tuples = partition.get_membership() alignments = [] for ix in index_tuples: concat = Concatenation(self, ix) sites = concat.alignment.get_sites() random.shuffle(sites) d = dict(zip(concat.alignment.get_names(), [iter(x) for x in zip(*sites)])) new_seqs = [[(k, ''.join(take(l, d[k]))) for k in d] for l in concat.lengths] for seqs, datatype, name in zip(new_seqs, concat.datatypes, concat.names): alignment = Alignment(seqs, datatype) alignment.name = name alignments.append(alignment) return self.__class__(records=sorted(alignments, key=lambda x: SORT_KEY(x.name)))
def find_nearest_mcat(band, skypos, radius, maglimit=30.): """ Given a sky position and a search radius, find the nearest MCAT source and return its position and magnitude in specified band. :param band: The band to use, either 'FUV' or 'NUV'. :type band: str :param skypos: Two element array of RA and Dec in decimal degrees. :type skypos: array :param radius: Search radius in decimal degrees. :type radius: float :param maglimit: The NUV faint limit to return MCAT sources for. :type maglimit: float """ data = get_mags(band, skypos[0], skypos[1], radius, 30) if not data: return None separation = [angularSeparation(skypos[0], skypos[1], a[0], a[1]) for a in zip(data['ra'], data['dec'])] minsep = np.where(separation == min(separation)) return {'mag':data[band]['mag'][minsep][0], 'skypos':np.array(list(zip(data['ra'], data['dec'])))[minsep][0].tolist(), 'distance':min(separation)}
def _fill_gaps(frame_iter1, frame_iter2): """Fill missing rows in the corrected images with data from nearby times. Parameters ---------- frame_iter1 : iterator of list of array The corrected frames (one list entry per channel). frame_iter2 : iterator of list of array The corrected frames (one list entry per channel). Yields ------ list of array The corrected and filled frames. """ first_obs = next(frame_iter1) for frame in frame_iter1: for frame_chan, fobs_chan in zip(frame, first_obs): fobs_chan[np.isnan(fobs_chan)] = frame_chan[np.isnan(fobs_chan)] if all(np.all(np.isfinite(chan)) for chan in first_obs): break most_recent = [x * np.nan for x in first_obs] for frame in frame_iter2: for fr_chan, mr_chan in zip(frame, most_recent): mr_chan[np.isfinite(fr_chan)] = fr_chan[np.isfinite(fr_chan)] yield [np.nan_to_num(mr_ch) + np.isnan(mr_ch) * fo_ch for mr_ch, fo_ch in zip(most_recent, first_obs)]
def _select_best_compound(cls, xs): """ Selects the "best" combination of units based on the number of units in the compound, then the ones with the smallest number of SI units, then the ones with the lowest indices in the basis list """ # Convert xs to numpy arrays xs = [numpy.asarray(list(x), dtype='int') for x in xs] # Find the number of units in each of the compounds lengths = [sum(abs(x)) for x in xs] min_length = min(lengths) min_length_xs = [x for x, l in zip(xs, lengths) if l == min_length] # If there are multiple compounds of equal length pick the compound # with the smallest number of base units if len(min_length_xs) == 1: min_x = min_length_xs[0] else: si_length_sums = [abs(x).dot(cls.si_lengths) for x in min_length_xs] min_si_length_sum = min(si_length_sums) min_si_length_sums = [x for x, l in zip(min_length_xs, si_length_sums) if l == min_si_length_sum] if len(min_si_length_sums) == 1: min_x = min_si_length_sums[0] else: index_sums = [nonzero(x)[0].sum() for x in min_si_length_sums] min_x = min_si_length_sums[argmin(index_sums)] return min_x
def print_label_on_image(frame, top_labels): labels = [(label_index[index], "{0:.2f}".format(prob)) for (index, prob) in top_labels] font = cv2.FONT_HERSHEY_COMPLEX_SMALL rect_color = (0, 0, 0) text_color = (255, 255, 255) font_scale = 0.45 thickness = 1 start_pt = (10, 10) extra_space = (4, 10) label_offset = 0 label_num = 0 for label, prob in labels: if label_num > 0: font_scale = .3 rect_pt = (start_pt[0], start_pt[1] + label_offset) text_size = cv2.getTextSize(label, font, font_scale, thickness)[0] prob_size = cv2.getTextSize(prob, font, font_scale, thickness)[0] prob_offset = (prob_size[0] + extra_space[0], 0) text_top = tuple(map(sum, list(zip(rect_pt, extra_space)))) rect_ops_pt = tuple(map(sum, list(zip(text_top, text_size, extra_space, prob_offset)))) text_bot = (text_top[0], rect_ops_pt[1] - extra_space[1]) prob_bot = (text_top[0] + text_size[0] + extra_space[0], text_bot[1]) cv2.rectangle(frame, rect_pt, rect_ops_pt, rect_color, thickness=cv2.cv.CV_FILLED) cv2.putText(frame, label, text_bot, font, font_scale, text_color, thickness) cv2.putText(frame, prob, prob_bot, font, font_scale, text_color, thickness) label_offset += rect_ops_pt[1] - rect_pt[1] label_num += 1 return frame
def testCartesianFromSpherical(self): nsamples = 10 theta = self.rng.random_sample(nsamples) * np.pi - 0.5 * np.pi phi = self.rng.random_sample(nsamples) * 2.0 * np.pi points = [] for ix in range(nsamples): vv = [np.cos(theta[ix]) * np.cos(phi[ix]), np.cos(theta[ix]) * np.sin(phi[ix]), np.sin(theta[ix])] points.append(vv) points = np.array(points) lon, lat = utils.sphericalFromCartesian(points) outPoints = utils.cartesianFromSpherical(lon, lat) for pp, oo in zip(points, outPoints): np.testing.assert_array_almost_equal(pp, oo, decimal=6) # test passing in arguments as floats for ix, (ll, bb) in enumerate(zip(lon, lat)): xyz = utils.cartesianFromSpherical(ll, bb) self.assertIsInstance(xyz[0], np.float) self.assertIsInstance(xyz[1], np.float) self.assertIsInstance(xyz[2], np.float) self.assertAlmostEqual(xyz[0], outPoints[ix][0], 12) self.assertAlmostEqual(xyz[1], outPoints[ix][1], 12) self.assertAlmostEqual(xyz[2], outPoints[ix][2], 12) # test _xyz_from_ra_dec <-> testCartesianFromSpherical np.testing.assert_array_equal(utils.cartesianFromSpherical(lon, lat), utils._xyz_from_ra_dec(lon, lat).transpose())
def assert_not_equal_array_array(list_a, list_b): exist_diff = False for (alpha, beta) in zip(list_a, list_b): for (elem_a, elem_b) in zip(alpha, beta): if elem_a != elem_b: exist_diff = True return exist_diff
def _weighting(self, interpPoints, values): """ interpPoints is a numpy array where interpolation is desired values are the model values. """ result = np.zeros((interpPoints.size, np.size(values[0])), dtype=float) inRange = np.where((interpPoints['airmass'] <= np.max(self.dimDict['airmass'])) & (interpPoints['airmass'] >= np.min(self.dimDict['airmass']))) usePoints = interpPoints[inRange] # Find the neighboring healpixels hpids, hweights = get_neighbours(self.nside, np.pi/2.-usePoints['altEclip'], usePoints['azEclipRelSun']) badhp = np.in1d(hpids.ravel(), self.dimDict['hpid'], invert=True).reshape(hpids.shape) hweights[badhp] = 0. norm = np.sum(hweights, axis=0) good = np.where(norm != 0.)[0] hweights[:, good] = hweights[:, good]/norm[good] amRightIndex, amLeftIndex, amRightW, amLeftW = self.indxAndWeights(usePoints['airmass'], self.dimDict['airmass']) nhpid = self.dimDict['hpid'].size # loop though the hweights and the airmass weights for hpid, hweight in zip(hpids, hweights): for amIndex, amW in zip([amRightIndex, amLeftIndex], [amRightW, amLeftW]): weight = hweight*amW result[inRange] += weight[:, np.newaxis]*values[amIndex*nhpid+hpid] return result
def attack(train, x): kwargs = {} # randomly select parameters and their corresponding values kwargs['k'] = random.randint(1,20) if random.randint(0,1): kwargs['model_id'] = "my_model" if random.randint(0,1): kwargs['max_iterations'] = random.randint(1,1000) if random.randint(0,1): kwargs['standardize'] = [True, False][random.randint(0,1)] if random.randint(0,1): method = random.randint(0,3) if method == 3: s = [[random.uniform(train[c].mean()[0]-100,train[c].mean()[0]+100) for p in range(kwargs['k'])] for c in x] print("s: {0}".format(s)) start = h2o.H2OFrame(list(zip(*s))) kwargs['user_points'] = start else: kwargs['init'] = ["Furthest","Random", "PlusPlus"][method] if random.randint(0,1): kwargs['seed'] = random.randint(1,10000) # display the parameters and their corresponding values print("-----------------------") print("x: {0}".format(x)) for k, v in zip(list(kwargs.keys()), list(kwargs.values())): if k == 'user_points': print(k + ": ") start.show() else: print(k + ": {0}".format(v)) H2OKMeansEstimator(**kwargs).train(x=x, training_frame=train) print("-----------------------")
def plot(data,axes=None,alpha=.5,clabel=True,cbar=False,aspect='equal',**kw): """Given output from post2d.data, plot the scalar as discrete or smooth plot. For raw discrete data, plot filled circles with radii of particles, colored by the scalar value. For smooth discrete data, plot image with optional contours and contour labels. For vector data (raw or smooth), plot quiver (vector field), with arrows colored by the magnitude. :param axes: matplotlib.axes\ instance where the figure will be plotted; if None, will be created from scratch. :param data: value returned by :yref:`yade.post2d.data` :param bool clabel: show contour labels (smooth mode only), or annotate cells with numbers inside (with perArea==2) :param bool cbar: show colorbar (equivalent to calling pylab.colorbar(mappable) on the returned mappable) :return: tuple of ``(axes,mappable)``; mappable can be used in further calls to pylab.colorbar. """ import pylab,math if not axes: axes=pylab.gca() if data['type']=='rawScalar': from matplotlib.patches import Circle import matplotlib.collections,numpy patches=[] for x,y,d,r in zip(data['x'],data['y'],data['val'],data['radii']): patches.append(Circle(xy=(x,y),radius=r)) coll=matplotlib.collections.PatchCollection(patches,linewidths=0.,**kw) coll.set_array(numpy.array(data['val'])) bb=coll.get_datalim(coll.get_transform()) axes.add_collection(coll) axes.set_xlim(bb.xmin,bb.xmax); axes.set_ylim(bb.ymin,bb.ymax) if cbar: axes.get_figure().colorbar(coll) axes.grid(True); axes.set_aspect(aspect) return axes,coll elif data['type']=='smoothScalar': loHi=data['bbox'] if data['perArea'] in (0,1): img=axes.imshow(data['val'],extent=(loHi[0][0],loHi[1][0],loHi[0][1],loHi[1][1]),origin='lower',aspect=aspect,**kw) ct=axes.contour(data['x'],data['y'],data['val'],colors='k',origin='lower',extend='both') if clabel: axes.clabel(ct,inline=1,fontsize=10) else: img=axes.imshow(data['val'],extent=(loHi[0][0],loHi[1][0],loHi[0][1],loHi[1][1]),origin='lower',aspect=aspect,interpolation='nearest',**kw) xStep=(data['x'][1]-data['x'][0]) if len(data['x'])>1 else 0 for y,valLine in zip(data['y'],data['val']): for x,val in zip(data['x'],valLine): axes.text(x-.4*xStep,y,('-' if math.isnan(val) else '%5g'%val),size=4) axes.update_datalim(loHi) axes.set_xlim(loHi[0][0],loHi[1][0]); axes.set_ylim(loHi[0][1],loHi[1][1]) if cbar: axes.get_figure().colorbar(img) axes.grid(True if data['perArea'] in (0,1) else False); axes.set_aspect(aspect) return axes,img elif data['type'] in ('rawVector','smoothVector'): import numpy loHi=data['bbox'] valX,valY=numpy.array(data['valX']),numpy.array(data['valY']) # rawVector data are plain python lists scalars=numpy.sqrt(valX**2+valY**2) # numpy.sqrt computes element-wise sqrt quiv=axes.quiver(data['x'],data['y'],data['valX'],data['valY'],scalars,**kw) #axes.update_datalim(loHi) axes.set_xlim(loHi[0][0],loHi[1][0]); axes.set_ylim(loHi[0][1],loHi[1][1]) if cbar: axes.get_figure().colorbar(coll) axes.grid(True); axes.set_aspect(aspect) return axes,quiv
def __iter__(self): shape = self.shape granularity = self.granularity offset = self.offset def out(group): """Calculate a single iteration output""" return np.array(list(it.chain.from_iterable( (base + s for s in it.product( *[range(o, o + x) for x, o in zip(shape[(granularity[0] + 1):], offset[(granularity[0] + 1):])])) for base in group))) if granularity[0] > 0 or granularity[1] == 1: def cycle(): """Iterator that produces one period/period of the output.""" base_iter = it.product(*[list(range(o, x + o)) for x, o in zip(shape[1:(granularity[0] + 1)], offset[1:(granularity[0] + 1)])]) for group in zip(*[base_iter] * granularity[1]): yield out(group) for positions in it.cycle(cycle()): yield positions else: base_iter = it.product(*[list(range(o, x + o)) for x, o in zip(shape[:(granularity[0] + 1)], offset[:(granularity[0] + 1)])]) for group in zip(*[base_iter] * granularity[1]): yield out([b[1:] for b in group])
def cycle(): """Iterator that produces one period/period of the output.""" base_iter = it.product(*[list(range(o, x + o)) for x, o in zip(shape[1:(granularity[0] + 1)], offset[1:(granularity[0] + 1)])]) for group in zip(*[base_iter] * granularity[1]): yield out(group)
def col_names_check(): iris_wheader = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) assert iris_wheader.col_names == ["sepal_len","sepal_wid","petal_len","petal_wid","class"], \ "Expected {0} for column names but got {1}".format(["sepal_len","sepal_wid","petal_len","petal_wid","class"], iris_wheader.col_names) iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv")) assert iris.col_names == ["C1","C2","C3","C4","C5"], "Expected {0} for column names but got " \ "{1}".format(["C1","C2","C3","C4","C5"], iris.col_names) df = h2o.H2OFrame.from_python(list(zip(*np.random.randn(100,4).tolist())), column_names=list("ABCD"), column_types=["enum"]*4) df.head() assert df.col_names == list("ABCD"), "Expected {} for column names but got {}".format(list("ABCD"), df.col_names) assert list(df.types.values()) == ["enum"]*4, "Expected {} for column types but got {}".format(["enum"]*4, df.types) df = h2o.H2OFrame(list(zip(*np.random.randn(100,4).tolist()))) df.head() assert df.col_names == ["C1","C2","C3","C4"], "Expected {} for column names but got {}".format(["C1","C2","C3","C4"] , df.col_names) assert list(df.types.values()) == ["real"]*4, "Expected {} for column types but got {}".format(["real"]*4, df.types) df = h2o.H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']}) df.head() assert df.col_names == ["B"], "Expected {} for column names but got {}".format(["B"], df.col_names) df = h2o.H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"]) df.head() assert df.col_names == ["X"], "Expected {} for column names but got {}".format(["X"], df.col_names)
def create_network(stage_depth): if stage_depth in (18, 18): stages = (2, 2, 2, 2) elif stage_depth in (34, 50): stages = (3, 4, 6, 3) elif stage_depth in (68, 101): stages = (3, 4, 23, 3) elif stage_depth in (102, 152): stages = (3, 8, 36, 3) else: raise ValueError('Invalid stage_depth value'.format(stage_depth)) bottleneck = False if stage_depth in (50, 101, 152): bottleneck = True layers = [Conv(**conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list(itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, bottleneck, stride)) layers.append(Pooling('all', op='avg')) layers.append(Conv(**conv_params(1, 1000, relu=False))) layers.append(Activation(Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def bprop_mergebroadcast(self, ngLayer, layers, error_views, error, deltas, out_shape, alpha, beta, alphas, betas): C, D, H, W, N = ngLayer.in_shape5D i = 0 for l, e in zip(layers, error_views): ngLayer.tensors_temp[(i * 4):(i * 4 + 4)] = e.primitive[0:4] i += 1 channel = c_longlong(ngLayer.channels.ctypes.data) tensors = c_longlong(ngLayer.tensors_temp.ctypes.data) prim = c_longlong(ngLayer.primitive.ctypes.data) self.mklEngine.Concat_b(tensors, ngLayer.layer_num, error.get_prim(), prim, channel, ngLayer.initOK_b, N, H, W) ngLayer.initOK_b = 1 i = 0 for l, e in list(zip(layers, error_views)): e.primitive[0:4] = ngLayer.tensors_temp[(i * 4):(i * 4 + 4)] e.shape5D = l.layers[-1].outputs.shape5D err = l.bprop(e) ngLayer.tensors_temp[(i * 4):(i * 4 + 4)] = err.primitive[0:4] i += 1 if deltas is None: return size = c_longlong(np.prod(ngLayer.in_shape5D)) prim = c_longlong(ngLayer.sum_prim.ctypes.data) tensors = c_longlong(ngLayer.tensors_temp.ctypes.data) self.mklEngine.MklSumTensor(ngLayer.layer_num, tensors, size, deltas.get_prim(), prim) deltas.shape5D = ngLayer.in_shape5D
def MCStoPFI(xysky, za): arg=[mt.atan2(j,i)+mt.pi for i,j in zip(*xysky)] print("Scaling", file=sys.stderr) scale=ScalingFactor(xysky) #deviation # base #print >> sys.stderr , "Offset 1" #offx1,offy1=OffsetBase(xysky) # z-dependent print("Offset 2", file=sys.stderr) offx2,offy2=DeviationZenithAngle(xysky,za) xyf3c=[] #print zip(scale,arg,offx1,offy1,offx2,offy2) #for s,t,ox1,oy1,ox2,oy2 in zip(scale,arg,offx1,offy1,offx2,offy2): for s,t,ox2,oy2 in zip(scale,arg,offx2,offy2): x=s*mt.cos(t)+ox2 y=s*mt.sin(t)+oy2 #x=s*mt.cos(t)+ox1+ox2 #y=s*mt.sin(t)+oy1+oy2 #print x,y,x+y #xyf3c.append([x,y]) #xyf3c.append([x,y,s,t,ox1,oy1,ox2,oy2]) xyf3c.append([x,y,s,t,ox2,oy2]) #print xyf3c return xyf3c
def iris_h2o_vs_sciKmeans(): # Connect to a pre-existing cluster # connect to localhost:54321 iris_h2o = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv")) iris_sci = np.genfromtxt(pyunit_utils.locate("smalldata/iris/iris.csv"), delimiter=',') iris_sci = iris_sci[:,0:4] s =[[4.9,3.0,1.4,0.2], [5.6,2.5,3.9,1.1], [6.5,3.0,5.2,2.0]] start = h2o.H2OFrame(s) h2o_km = h2o.kmeans(x=iris_h2o[0:4], k=3, user_points=start, standardize=False) sci_km = KMeans(n_clusters=3, init=np.asarray(s), n_init=1) sci_km.fit(iris_sci) # Log.info("Cluster centers from H2O:") print("Cluster centers from H2O:") h2o_centers = h2o_km.centers() print(h2o_centers) # Log.info("Cluster centers from scikit:") print("Cluster centers from scikit:") sci_centers = sci_km.cluster_centers_.tolist() print(sci_centers) for hcenter, scenter in zip(h2o_centers, sci_centers): for hpoint, spoint in zip(hcenter,scenter): assert (hpoint- spoint) < 1e-10, "expected centers to be the same"
def shuffling_large(): print("Reading in Arcene training data for binomial modeling.") train_data = h2o.upload_file(path=pyunit_utils.locate("smalldata/arcene/shuffle_test_version/arcene.csv")) train_data_shuffled = h2o.upload_file(path=pyunit_utils.locate("smalldata/arcene/shuffle_test_version/arcene_shuffled.csv")) print("Create model on original Arcene dataset.") h2o_model = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5) h2o_model.train(x=list(range(1000)), y=1000, training_frame=train_data) print("Create second model on original Arcene dataset.") h2o_model_2 = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5) h2o_model_2.train(x=list(range(1000)), y=1000, training_frame=train_data) print("Create model on shuffled Arcene dataset.") h2o_model_s = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5) h2o_model_s.train(x=list(range(1000)), y=1000, training_frame=train_data_shuffled) print("Assert that number of predictors remaining and their respective coefficients are equal.") for x, y in zip(h2o_model._model_json['output']['coefficients_table'].cell_values,h2o_model_2. _model_json['output']['coefficients_table'].cell_values): assert (type(x[1]) == type(y[1])) and (type(x[2]) == type(y[2])), "coefficients should be the same type" if isinstance(x[1],float): assert abs(x[1] - y[1]) < 5e-10, "coefficients should be equal" if isinstance(x[2],float): assert abs(x[2] - y[2]) < 5e-10, "coefficients should be equal" for x, y in zip(h2o_model._model_json['output']['coefficients_table'].cell_values,h2o_model_s. _model_json['output']['coefficients_table'].cell_values): assert (type(x[1]) == type(y[1])) and (type(x[2]) == type(y[2])), "coefficients should be the same type" if isinstance(x[1],float): assert abs(x[1] - y[1]) < 5e-10, "coefficients should be equal" if isinstance(x[2],float): assert abs(x[2] - y[2]) < 5e-10, "coefficients should be equal"
def test_storage(self): import os fname = data_filename("interface_set_storage_test.nc") if os.path.isfile(fname): os.remove(fname) template_traj = make_1d_traj([0.0]) template = template_traj[0] storage_w = paths.Storage(fname, "w") storage_w.save(template_traj) storage_w.save(self.increasing_set) storage_w.sync_all() storage_r = paths.AnalysisStorage(fname) reloaded = storage_r.interfacesets[0] assert_items_equal(reloaded.lambdas, self.increasing_set.lambdas) assert_equal(reloaded.period_min, self.increasing_set.period_min) assert_equal(reloaded.period_max, self.increasing_set.period_max) for (truth, beauty) in zip(self.increasing_set, reloaded): assert_equal(truth, beauty) for (v, l) in zip(reloaded.volumes, reloaded.lambdas): assert_equal(reloaded.get_lambda(v), l) storage_r.close() storage_w.close() if os.path.isfile(fname): os.remove(fname)
def local_maxima(array2d,user_peak,index=False,count=4,floor=0,bug=False): from operator import itemgetter, attrgetter if user_peak == 0: where = ((array2d >= np.roll(array2d,1,0)) & (array2d >= np.roll(array2d,-1,0)) & (array2d >= np.roll(array2d,0,1)) & (array2d >= np.roll(array2d,0,-1)) & (array2d >= old_div(array2d.max(),5.0)) & (array2d > floor*np.ones(array2d.shape)) & (array2d >= array2d.mean())) else: #some simpler filter if user indicated some modes where = array2d > floor #ignore the lesser local maxima, throw out anything with a ZERO if bug==True: print(array2d,array2d[where.nonzero()],where.nonzero()[0]) peaks = list(zip(where.nonzero()[0],where.nonzero()[1],array2d[where.nonzero()])) peaks = sorted(peaks,key=itemgetter(2),reverse=True) if len(peaks) > count and user_peak==0: peaks = peaks[0:count] keys = ['y_i','z_i','amp'] peaks = [dict(list(zip(keys,peaks[x]))) for x in range(len(peaks))] return peaks
def inception_bare(ref_module, kvals, name="i"): (p1, p2, p3) = kvals branch1 = [Conv(fshape(1, p1[0]), **common)] if p1[0] else [] branch2 = [Conv(fshape(1, p2[0]), **common), Conv(fshape(3, p2[1]), **commonp1)] branch3 = [Pooling(op=p3[0], **pool3s1p1)] + ( [Conv(fshape(1, p3[1]), **common)] if p3[1] else []) branch1 = Sequential(branch1) branch2 = Sequential(branch2) branch3 = Sequential(branch3) (branch1_ref, branch2_ref, branch3_ref) = ref_module[0].layers if p1[0]: for ll, lr in zip(branch1.layers, branch1_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}}) for ll, lr in zip(branch2.layers, branch2_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}}) if p3[1]: for ll, lr in zip(branch3.layers, branch3_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}}) return (branch1.layers, branch2.layers, branch3.layers)
def _filter_on_value(self, sounding_ids, data_values, filter_comparison=None, mapping_func=lambda x: x): if filter_comparison == None: print("Filter value not supplied returning all ids", file=sys.stderr) return sounding_ids if not isinstance(filter_comparison, types.FunctionType): def regexp_compare(val): return re.search(filter_comparison, str(val)) != None comparison_func = regexp_compare else: comparison_func = filter_comparison ret_ids = [] for obj_snd_ids, obj_values in zip(sounding_ids, data_values): obj_filtered_ids = [] mapped_values = list(map(mapping_func, obj_values)) for curr_id, curr_value in zip(obj_snd_ids, mapped_values): if hasattr(curr_value, "strip"): curr_value = curr_value.strip() if comparison_func(curr_value): obj_filtered_ids.append(curr_id) obj_filtered_ids.sort() ret_ids.append( tuple(obj_filtered_ids) ) return tuple(ret_ids)
def shape_divide(arr, scale, reduction='mean'): '''Scale down an array (shape N x M x ...) by the specified scale in each dimension (n x m x ...) Each dimension in arr must be divisible by its scale (throws an error otherwise) This is reduces each sub-array (n x m x ...) to a single element according to the reduction parameter, which is one of: * mean (default): mean of each sub-array * median: median of each sub-array * first: the [0,0,0, ...] element of the sub-array * all: all the possible (N x M x ...) sub-arrays; returns an array of shape (n, m, ..., N, M, ...) This is a downsampling operation, similar to scipy.misc.imresize and scipy.ndimage.interpolate''' arr = np.asanyarray(arr) reduction_options = ['mean', 'median', 'first', 'all'] assert reduction in reduction_options, \ 'reduction must be one of: ' + ' '.join(reduction_options) scale = coerce_to_target_length(scale, arr.ndim) assert all([sh % sc == 0 for sh, sc in zip(arr.shape,scale)]), \ 'all dimensions must be divisible by their respective scale!' new_shape = flatten([sh//sc, sc] for sh, sc in zip(arr.shape, scale)) # group pixes into smaller sub-arrays that can then be modified by standard operations subarrays = _transpose_interleaved(arr.reshape(new_shape)) flat_subarrays = subarrays.reshape([np.product(scale)] + new_shape[::2]) return (np.mean(flat_subarrays, axis=0) if reduction == 'mean' else np.median(flat_subarrays, axis=0) if reduction == 'median' else flat_subarrays[0] if reduction == 'first' else subarrays if reduction == 'all' else None)
def loadSedsFromList(self, sedNameList, magNormList, \ internalAvList=None, galacticAvList=None, redshiftList=None): """ Load the Seds specified by sedNameList, applying the specified normalization, extinction, and redshift. @param [in] sedList is a list of file names containing Seds @param [in] magNorm is the magnitude normalization @param [in] internalAvList is an optional list of A(V) due to internal dust @param [in] galacticAvList is an optional list of A(V) due to Milky Way dust @param [in] redshiftList is an optional list of redshifts for the input Sed Seds are read in and stored to this object's internal list of Seds. Note: if you constructed this SedList object without internalAvList, you cannot load Seds with internalAvList now. Likewise for galacticAvlist and redshiftList. """ if not self._initialized: if internalAvList is not None: self._internal_av_list = copy.deepcopy(list(internalAvList)) else: self._internal_av_list = None if galacticAvList is not None: self._galactic_av_list = copy.deepcopy(list(galacticAvList)) else: self._galactic_av_list = None if redshiftList is not None: self._redshift_list = copy.deepcopy(list(redshiftList)) else: self._redshift_list = None else: if self._internal_av_list is None and internalAvList is not None: raise RuntimeError("This SedList does not contain internalAvList") elif self._internal_av_list is not None: if internalAvList is None: self._internal_av_list += [None] * len(sedNameList) else: self._internal_av_list += list(internalAvList) if self._galactic_av_list is None and galacticAvList is not None: raise RuntimeError("This SedList does not contain galacticAvList") elif self._galactic_av_list is not None: if galacticAvList is None: self._galactic_av_list += [None] * len(sedNameList) else: self._galactic_av_list += list(galacticAvList) if self._redshift_list is None and redshiftList is not None: raise RuntimeError("This SedList does not contain redshiftList") elif self._redshift_list is not None: if redshiftList is None: self._redshift_list += [None] * len(sedNameList) else: self._redshift_list += list(redshiftList) temp_sed_list = [] for sedName, magNorm in zip(sedNameList, magNormList): sed = Sed() if sedName != "None": if self._spec_map is not None: sed.readSED_flambda(os.path.join(self._file_dir, self._spec_map[sedName])) else: sed.readSED_flambda(os.path.join(self._file_dir, sedName)) if self._normalizing_bandpass is not None: fNorm = sed.calcFluxNorm(magNorm, self._normalizing_bandpass) else: fNorm = getImsimFluxNorm(sed, magNorm) sed.multiplyFluxNorm(fNorm) temp_sed_list.append(sed) if internalAvList is not None: self._av_int_wavelen, \ self._a_int, \ self._b_int = self.applyAv(temp_sed_list, internalAvList, self._av_int_wavelen, self._a_int, self._b_int) if redshiftList is not None: self.applyRedshift(temp_sed_list, redshiftList) if self._wavelen_match is not None: for sedObj in temp_sed_list: if sedObj.wavelen is not None: sedObj.resampleSED(wavelen_match=self._wavelen_match) if galacticAvList is not None: self._av_gal_wavelen, \ self._a_gal, \ self._b_gal = self.applyAv(temp_sed_list, galacticAvList, self._av_gal_wavelen, self._a_gal, self._b_gal) self._sed_list += temp_sed_list self._initialized = True
def fwd_exit_cols(self): return [(1 - p)**(np.arange(r)[::-1]) for r, p in zip(self.rs, self.ps)]
def bwd_enter_rows(self): return [ stats.binom.pmf(np.arange(r)[::-1], r - 1, p) for r, p in zip(self.rs, self.ps) ]
def __iter__(self): for frame, displacement in zip(self._base, self.displacements): yield self._align(frame, displacement)
def __iter__(self): for frames in zip(*self._sequences): yield np.concatenate(frames, axis=3)
# %% # # def cart2pol(x, y): # rho = np.sqrt(x**2 + y**2) # phi = np.arctan2(y, x) # return(rho, phi) # # def pol2cart(rho, phi): # x = rho * np.cos(phi) # y = rho * np.sin(phi) # return(x, y) #%% find center of mass movie_shifts_x = np.zeros((T, ) + dim_r) movie_shifts_y = np.zeros((T, ) + dim_r) for r, idx_mat in zip(res_p, idfl): img_temp = np.zeros(np.prod(dim_r)) img_temp[idx_mat] = 1 img_temp = np.reshape(img_temp, dim_r, order='F') # pl.imshow(img_temp) x1, x2 = np.round(scipy.ndimage.center_of_mass(img_temp)).astype(np.int) print((x1, x2)) movie_shifts_x[:, x1, x2] = np.array(r[0][-1])[:, 0] movie_shifts_y[:, x1, x2] = np.array(r[0][-1])[:, 1] #%% pl.close() mn = np.mean(m, 0) pl.imshow(mn) for imm_x, imm_y in zip(movie_shifts_x, movie_shifts_y):
traces, masks, triggers_out, amplitudes, ISI = load_data_from_stored_results(base_folder, thresh_CR=0.1, threshold_responsiveness=0.1, is_blob=True, time_CR_on=-.1, time_US_on=.05, thresh_MOV_iqr=1000, time_CS_on_MOV=-.25, time_US_on_MOV=0) wheel_mat = traces['wheel_traces'] ftraces = traces['fluo_traces'] time_mat = traces['time_fluo'] time_e_mat = traces['time_eye'] time_w_mat = traces['time_wheel'] eye_mat = traces['eye_traces'] amplitudes_eyelid = amplitudes['amplitudes_eyelid'] amplitudes_fluo = amplitudes['amplitudes_fluo'] #%% counter = 0 with np.load(glob(os.path.join(base_folder, '*-template_total.npz'))[0]) as ld: templs = ld['template_each'] for mn1, A in zip(templs, masks['A_each']): pl.subplot(2, 3, counter + 1) # mn=np.median(templs,0) mn = mn1 d1, d2 = np.shape(mn) # selem = disk(50) # mn=(mn1 - np.min(mn1))/(np.max(mn1)-np.min(mn1)) # mn = rank.equalize(mn, selem=selem) # mn = exposure.equalize_hist(mn,nbins=1024) # os.path.split(fl)[-1] # pl.imshow(mn,cmap='gray') # pl.imshow(mn,cmap='gray',vmax=np.percentile(mn,99)) # pl.imshow(mn,cmap='gray',vmax=np.percentile(mn,98)) pl.imshow(A.mean(1).reshape((d1, d2), order='F'), alpha=1, cmap='hot')
def _conn_to_dict(cls, conn): if isinstance(conn, dict): return conn if isinstance(conn, (tuple,list)): return dict(zip(cls._conn_params,conn)) return {"port":conn}
def __str__(self): """Return ``str(self)``.""" return ' x '.join('[{}, {}]'.format(xmin, xmax) for xmin, xmax in zip(self.min_pt, self.max_pt))
def test_complete_with_registration_cancellation(self, runner): course_id = u"cmsc40300" course_name = u"Foobarmentals of Foobar II" admin_id = u"admin" instructor_ids = [u"instructor"] grader_ids = [u"grader"] student_ids = [u"student1", u"student2", u"student3", u"student4"] all_users = instructor_ids + grader_ids + student_ids admin, instructors, graders, students = self.create_clients( runner, admin_id, instructor_ids, grader_ids, student_ids, course_id, verbose=True) self.create_users(admin, all_users) self.create_course(admin, course_id, course_name) course = Course.get_by_course_id(course_id) self.assertIsNotNone(course) self.assertEqual(course.name, course_name) result = admin.run( "admin course set-attribute %s default_extensions 2" % (course_id)) self.assertEqual(result.exit_code, 0) result = admin.run( "admin course set-attribute %s extension_policy per-student" % (course_id)) self.assertEqual(result.exit_code, 0) self.add_users_to_course(admin, course_id, instructors, graders, students) deadline = get_datetime_now_utc() - timedelta(minutes=5) deadline = deadline.isoformat(sep=" ") result = instructors[0].run( "instructor assignment add", ["pa1", "Programming Assignment 1", deadline]) self.assertEqual(result.exit_code, 0) result = instructors[0].run("instructor assignment set-attribute", ["pa1", "max_students", "2"]) self.assertEqual(result.exit_code, 0) teams = [u"student1-student2", u"student3-student4"] students_team = [(students[0], students[1]), (students[2], students[3])] self.register_team(students_team[0], teams[0], "pa1", course_id) self.register_team(students_team[1], teams[1], "pa1", course_id) _, _, team_commits = self.create_team_repos(admin, course_id, teams[0:2], students_team[0:2]) # Team 0 cancels their registration, which they can do because they haven't submitted yet. result = students_team[0][0].run( "student assignment cancel-registration", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS) # Team 0 tries to cancel their registration again, which doesn't work. There's nothing to cancel. result = students_team[0][0].run( "student assignment cancel-registration", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_FAIL) # Team 0 registers again result = students_team[0][0].run( "student assignment register", ["pa1", "--partner", students_team[0][1].user_id]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS) # Team 1 submits. result = students_team[1][0].run("student assignment submit", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS) # Team 1 tries to cancel their registration, which doesn't work. They have a submission. result = students_team[1][0].run( "student assignment cancel-registration", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_FAIL) # Team 1 cancels their submission result = students_team[1][0].run("student assignment cancel-submit", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS) # Team 1 can now cancel their registration. result = students_team[1][0].run( "student assignment cancel-registration", ["pa1", "--yes"]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS) for team, student_team in zip(teams, students_team): result = student_team[0].run("student team show", [team]) self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)
def evaluate_model_binary(model, name, data=None, valid_d=None, valid_l=None, train_d=None, train_l=None, n_proc=2, betaloss=False, fudgeysoft=False): if not model.prediction_node.shape['f'] == 2: logger.warning( "Evaluate_model_binary is intended only for binary" "classification, this model has more or less outputs than 2") report_str = "T_nll,\tT_acc,\tT_ROCA,\tV_nll,\tV_acc,\tV_ROCA,\td_acc,\t" \ "d_ROCA,\tri0,\tr01,\tri2,\tri3,\trim\n" # Training Data ########################################################### if train_d is None: train_d = data.train_d if train_l is None: train_l = data.train_l train_preds = [] train_gt = [] for i, (d, l) in enumerate(zip(train_d[:4], train_l[:4])): if os.path.exists("2-" + name + "_train_%i_pred.h5" % i): pred = utils.h5load("2-" + name + "_train_%i_pred.h5" % i) else: pred = model.predict_dense(d, pad_raw=False) # (f,z,x,y) utils.h5save(pred, "2-" + name + "_train_%i_pred.h5" % i) if betaloss: pred = pred[0] # only mode else: pred = pred[1] # only pred for class '1' l = l[0] # throw away channel l, pred = image.center_cubes(l, pred, crop=True) train_preds.append(pred) train_gt.append(l) train_gt = [gt > 0.5 for gt in train_gt] # binarise possibly probabilistic GT train_acc, train_area, train_thresh = evaluate(train_gt, train_preds, "1-" + name + "_train") gt_flat = np.concatenate(list(map(np.ravel, train_gt))) preds_flat = np.concatenate(list(map(np.ravel, train_preds))) if fudgeysoft: train_nll = binary_nll(rescale_fudge(preds_flat), gt_flat) else: train_nll = binary_nll(preds_flat, gt_flat) print("Train nll %.3f" % train_nll) report_str += "%.3f,\t%.3f,\t%.3f,\t" % (train_nll, train_acc, train_area) error_hist(gt_flat, preds_flat, "1-" + name + "_train", thresh=train_thresh) # Validation data ######################################################### if data and len(data.valid_l) == 0: raise RuntimeError("No validation data!") if valid_d is None: valid_d = data.valid_d if valid_l is None: valid_l = data.valid_l valid_preds = [] valid_gt = [] for i, (d, l) in enumerate(zip(valid_d, valid_l)): if os.path.exists("2-" + name + "_valid_%i_pred.h5" % i): pred = utils.h5load("2-" + name + "_valid_%i_pred.h5" % i) else: pred = model.predict_dense(d, pad_raw=False) # (f,z,x,y) utils.h5save(pred, "2-" + name + "_valid_%i_pred.h5" % i) if betaloss: pred = pred[0] # only mode else: pred = pred[1] # only pred for class '1' l = l[0] # throw away channel l, pred = image.center_cubes(l, pred, crop=True) valid_preds.append(pred) valid_gt.append(l) valid_gt = [gt > 0.5 for gt in valid_gt] # binarise possibly probabilistic GT valid_acc, valid_area, valid_thresh = evaluate(valid_gt, valid_preds, "1-" + name + "_valid") gt_flat = np.concatenate(list(map(np.ravel, valid_gt))) preds_flat = np.concatenate(list(map(np.ravel, valid_preds))) if fudgeysoft: valid_nll = binary_nll(rescale_fudge(preds_flat), gt_flat) else: valid_nll = binary_nll(preds_flat, gt_flat) print("Valid nll %.3f" % valid_nll) report_str += "%.3f,\t%.3f,\t%.3f,\t%.3f,\t%.3f,\t" % ( valid_nll, valid_acc, valid_area, train_acc - valid_acc, train_area - valid_area) error_hist(gt_flat, preds_flat, "1-" + name + "_valid", thresh=valid_thresh) ris = [] best_ris = [] for i, (l, p) in enumerate(zip(valid_gt, valid_preds)): if betaloss or fudgeysoft: p = rescale_fudge(p) p_int = (p * 255).astype(np.uint8) ri, best_ri, seg = image.optimise_segmentation(l, p_int, "2-" + name + "_valid_%i" % i, n_proc=n_proc) best_ris.append(best_ri) ris.append(ri) ris.append(np.mean(ris)) for ri in ris: report_str += "%.4f,\t" % (ri, ) with open("0-%s-REPORT.txt" % (name, ), 'w') as f: f.write(report_str)
noisyCs.append(noisyC) num_frames.append(np.where(~np.isnan(noisyC.sum(0)))[0][-1] + 1) count += 1 pl.subplot(1, 3, count) crd = cm.utils.visualization.plot_contours(A, Cn, thr=0.9, vmax=.75) # pl.xlim([200,400]);pl.ylim([200,400]) # pl.subplot(2,3,2*count) # pl.imshow(A.sum(0).reshape(dims,)) #%% pl.figure() count = 0 idx_neuro = 10 neuron_groups = [[180], [183, 277], [183, 277, 709]] for A, b, C, f, Cn, ftc, noisyC, nfr, ngrp in zip(As, bs, Cs, fs, Cns, files_to_compare, noisyCs, num_frames, neuron_groups): count += 1 a = A.tocsc()[:, np.array(ngrp) - 1] pl.subplot(3, 3, count) # pl.imshow(Cn,vmax = 0.7) crd = cm.utils.visualization.plot_contours(a, Cn, thr=0.9, vmax=.7, colors='r') pl.ylabel('Correlation Image') pl.xlim([200, 400]) pl.ylim([200, 400]) # pl.colorbar()
def load(self): year = int(re.search(r'\d{4}', self.election_id).group()) xlsfile = xlrd.open_workbook(self._xls_file_path) if 'primary' in self._xls_file_path: primary = True if year == 2004: party = None # get party from individual sheets else: party = self._party_from_filepath(self._xls_file_path) else: primary = False party = None results = [] sheets = self._get_sheets(xlsfile) for sheet in sheets: if year == 2004: if primary: party = sheet.name.split()[1] candidates = self._build_candidates_2004(sheet, party) elif self.source == "20021126__wy__special__general__natrona__state_house__36__precinct.xls": candidates = self._build_candidates_2002_special(sheet) elif year < 2004: if primary: if year == 2000: party = self.source.split('__')[2].title() else: party = sheet.name.split()[1] if year == 2002: candidates = self._build_candidates_2002(sheet, party) elif year == 2000: candidates = self._build_candidates_2000( sheet, party, primary) else: candidates = self._build_candidates(sheet, party) for i in range(sheet.nrows): row = [r for r in sheet.row_values(i) if not r == ''] # remove empty cells # Skip non-target offices if self._skip_row(row): continue else: precinct = str(row[0]) if self.source == '20021126__wy__special__general__natrona__state_house__36__precinct.xls': votes = [v for v in row[1:] if not v == ''] elif len(candidates) == 1: votes = [v for v in row[1:] if not v == ''] elif year == 2000 and primary is False: precinct = row[0] votes = [ v for v in row[2:len(candidates)] if not v == precinct ] elif year < 2006: votes = [ v for v in row[2:len(candidates)] if not v == '' ] else: votes = [ v for v in row[1:len(candidates)] if not v == '' ] grouped_results = list(zip(candidates, votes)) for (candidate, office, candidate_party), votes in grouped_results: if not votes == '-': results.append( self._prep_precinct_result( precinct, self.mapping['name'], candidate, office, candidate_party, votes)) try: RawResult.objects.insert(results) except: raise
def genFields(names, types): return list(zip(names, types))
npix = hp.nside2npix(nside) magMap = np.zeros((npix, sunAlts.size), dtype=float) rmsMap = np.zeros((npix, sunAlts.size), dtype=float) filterNames = ['R', 'G', 'B'] #filterNames = ['R'] #sunAlts = [sunAlts[5]] for filterName in filterNames: dataPath = getPackageDir('SIMS_SKYBRIGHTNESS_DATA') dbAddress = 'sqlite:///'+os.path.join(dataPath, 'photometry', 'skydata.sqlite') names = ['mjd', 'ra', 'dec', 'alt', 'starMag', 'sky', 'filter'] types = [float, float, float, float, float, float, '|S1'] dtypes = list(zip(names, types)) engine = sqla.create_engine(dbAddress) connection = engine.raw_connection() cursor = connection.cursor() for i, ack in enumerate(sunAlts): q = 'select dates.mjd, stars.ra, stars.dec, obs.alt, obs.starMag, obs.sky, obs.filter from obs,stars,dates where obs.starID = stars.ID and obs.dateID = dates.ID and obs.filter = "%s" and obs.dateID in (select ID from dates where sunAlt >= %f and sunAlt <= %f)' % (filterName, sunAlts[ i]-altBin, sunAlts[i]+altBin) print('Executing:') print(q) print('%i of %i' % (i, np.size(sunAlts))) cursor.execute(q) data = cursor.fetchall()
def evaluate(gt, preds, save_name, thresh=None, n_proc=None): """ Evaluate prediction w.r.t. GT Saves plot to file :param save_name: :param gt: :param preds: from 0.0 to 1.0 :param thresh: if thresh is given (e.g. from tuning on validation set) some performance measures are shown at this threshold :return: perf, roc-area, threshs """ n = 64 threshs = np.linspace(0, 1, n) perf = np.zeros((7, threshs.size)) print("Scanning for best probmap THRESHOLD") if n_proc: if n_proc > 2: mp = Pool(6) ret = mp.imap(eval_thresh, zip(threshs, repeat(gt), repeat(preds))) else: ret = list(map(eval_thresh, zip(threshs, repeat(gt), repeat(preds)))) for i, r in enumerate(ret): perf[:, i] = r # Find thresh according to maximal accuracy thresh = find_nearest(threshs, thresh) if thresh else threshs[perf[5, :].argmax()] area = roc_area(perf[0, :], perf[1, :]) area2 = roc_area(perf[2, :], perf[3, :]) plt.figure(figsize=(12, 9)) plt.subplot(221) plt.plot(threshs, perf[6, :].T) plt.ylim(0, 1) f1_max = perf[6, np.where(threshs == thresh)] plt.vlines(thresh, 0, 1, color='gray') plt.title("F1=%.2f at %.4f" % (f1_max, thresh)) plt.xlabel("Classifier Threshold") plt.subplot(222) plt.plot(threshs, perf[5, :].T) plt.ylim(0, 1) acc_max = perf[5, np.where(threshs == thresh)] plt.vlines(thresh, 0, 1, color='gray') plt.title("Accuracy max=%.2f at %.4f" % (acc_max, thresh)) plt.xlabel("Classifier Threshold") plt.subplot(223) plt.plot(perf[3, :].T, perf[2, :].T) plt.ylim(0, 1) plt.xlabel("Recall") plt.ylabel("Precision") plt.title("Precision-Recall AUC=%.4f" % (area2, )) plt.subplot(224) plt.plot(perf[1, :].T, perf[0, :].T) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") title = "ROC AUC=%.4f" % (area, ) plt.title(title) with FileLock('plotting'): plt.savefig(save_name + ".performance.png", bbox_inches='tight') return acc_max, area, thresh
def add_input(self, accumulator, element): return [ c.add_input(a, element) for c, a in zip(self._combiners, accumulator) ]
def create_cnn(config_file, n_ch, param_file=None, mfp=False, axis_order='theano', constant_weights=False, imposed_input_size=None): raise RuntimeError("Dont use this, rebuild the graph and import the " "weights using load_params_into_model") config = Config(config_file, None, None, use_existing_dir=True, override_MFP_to_active=mfp, imposed_input_size=imposed_input_size) if config.mode!='img-img': raise NotImplementedError() if axis_order=='theano': ps = config.patch_size ndim = len(ps) input_size = [None, ] * (2 + ndim) input_size[0] = config.batch_size if ndim==3: tags = 'b,z,f,y,x' input_size[1] = config.patch_size[0] input_size[2] = n_ch input_size[3] = config.patch_size[1] input_size[4] = config.patch_size[2] elif ndim==2: tags = 'b,f,x,y' input_size[1] = n_ch input_size[2] = config.patch_size[0] input_size[3] = config.patch_size[1] if param_file is None: param_file = config.paramfile params = pickleload(param_file) pool = params[-1] f_shapes = params[0] params = params[1:-1] # come in order W0, b0, W1, b1,... neuromancer.node_basic.model_manager.newmodel('legacy') inp = neuromancer.Input(input_size, tags) conv = list( zip(config.nof_filters, # doesn't have to be a list, does it? config.filters, config.pool, config.activation_func, config.pooling_mode, params[::2], params[1::2])) for i, (n, f, p, act, p_m, W, b) in enumerate(conv): W = [W, 'const'] if constant_weights else W b = [b, 'const'] if constant_weights else b inp = neuromancer.Conv(inp, n, f, p, mfp=mfp, activation_func=act, w=W, b=b) # last Layer W = [params[-2], 'const'] if constant_weights else params[-2] b = [params[-1], 'const'] if constant_weights else params[-1] out = neuromancer.Conv(inp, config.n_lab, (1,) * ndim, (1,) * ndim, activation_func='lin', w=W, b=b) if mfp: out = neuromancer.FragmentsToDense(out) if config.target in ['affinity', 'malis']: probs = neuromancer.Softmax(out, n_class=2, n_indep=3, name='class_probabilities') else: probs = neuromancer.Softmax(out, n_class=config.n_lab, name='class_probabilities') elif axis_order=='dnn': raise NotImplementedError() model = neuromancer.model_manager.getmodel('legacy') model.designate_nodes(input_node=inp, prediction_node=probs) return model
def compact(self, accumulator): return [c.compact(a) for c, a in zip(self._combiners, accumulator)]
def merge_accumulators(self, accumulators): sums, counts = zip(*accumulators) return sum(sums), sum(counts)
def test_lzip(self): lst = [builtins.range(10), builtins.range(10), builtins.range(10)] results = lzip(*lst), expecteds = list(builtins.zip(*lst)), lengths = 10, self.check_results(results, expecteds, lengths)
def extract_output(self, accumulator): return tuple([ c.extract_output(a) for c, a in zip(self._combiners, accumulator) ])
def execute(self, context=None): metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id) table = metastore.get_table(table_name=self.table) field_types = {col.name: col.type for col in table.sd.cols} exprs = {('', 'count'): 'COUNT(*)'} for col, col_type in list(field_types.items()): d = {} if self.assignment_func: d = self.assignment_func(col, col_type) if d is None: d = self.get_default_exprs(col, col_type) else: d = self.get_default_exprs(col, col_type) exprs.update(d) exprs.update(self.extra_exprs) exprs = OrderedDict(exprs) exprs_str = ",\n ".join( [v + " AS " + k[0] + '__' + k[1] for k, v in exprs.items()]) where_clause = [ "{0} = '{1}'".format(k, v) for k, v in self.partition.items() ] where_clause = " AND\n ".join(where_clause) sql = """ SELECT {exprs_str} FROM {self.table} WHERE {where_clause}; """.format(**locals()) hook = PrestoHook(presto_conn_id=self.presto_conn_id) self.log.info('Executing SQL check: %s', sql) row = hook.get_first(hql=sql) self.log.info("Record: %s", row) if not row: raise AirflowException("The query returned None") part_json = json.dumps(self.partition, sort_keys=True) self.log.info("Deleting rows from previous runs if they exist") mysql = MySqlHook(self.mysql_conn_id) sql = """ SELECT 1 FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}' LIMIT 1; """.format(**locals()) if mysql.get_records(sql): sql = """ DELETE FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}'; """.format(**locals()) mysql.run(sql) self.log.info("Pivoting and loading cells into the Airflow db") rows = [(self.ds, self.dttm, self.table, part_json) + (r[0][0], r[0][1], r[1]) for r in zip(exprs, row)] mysql.insert_rows(table='hive_stats', rows=rows, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ])
def merge_accumulators(self, accumulators): return [ c.merge_accumulators(a) for c, a in zip(self._combiners, zip(*accumulators)) ]
def test_csv_table(): # Maybe not truly a unit test, but here because it doesn't do # network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") # TODO: use StringIO.StringIO(data) rather than writing files try: # create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) # need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) # test iterator for table_row, expected_row in zip(table, data): assert table_row == expected_row # test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row[2:] assert rowset_row['rowId'] == expected_row[0] assert rowset_row['versionNumber'] == expected_row[1] df = table.asDataFrame() assert list(df['Name']) == [row[2] for row in data] assert list(df['Born']) == [row[3] for row in data] assert list(df['Living']) == [row[5] for row in data] assert list(df.index) == ['%s_%s' % tuple(row[0:2]) for row in data] assert df.shape == (8, 4) except Exception: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def calculate_feature_matrix(features, entityset=None, cutoff_time=None, instance_ids=None, entities=None, relationships=None, cutoff_time_in_index=False, training_window=None, approximate=None, save_progress=None, verbose=False, chunk_size=None, n_jobs=1, dask_kwargs=None): """Calculates a matrix for a given set of instance ids and calculation times. Args: features (list[:class:`.FeatureBase`]): Feature definitions to be calculated. entityset (EntitySet): An already initialized entityset. Required if `entities` and `relationships` not provided cutoff_time (pd.DataFrame or Datetime): Specifies at which time to calculate the features for each instance. The resulting feature matrix will use data up to and including the cutoff_time. Can either be a DataFrame with 'instance_id' and 'time' columns, DataFrame with the name of the index variable in the target entity and a time column, or a single value to calculate for all instances. If the dataframe has more than two columns, any additional columns will be added to the resulting feature matrix. instance_ids (list): List of instances to calculate features on. Only used if cutoff_time is a single datetime. entities (dict[str -> tuple(pd.DataFrame, str, str)]): dictionary of entities. Entries take the format {entity id: (dataframe, id column, (time_column))}. relationships (list[(str, str, str, str)]): list of relationships between entities. List items are a tuple with the format (parent entity id, parent variable, child entity id, child variable). cutoff_time_in_index (bool): If True, return a DataFrame with a MultiIndex where the second index is the cutoff time (first is instance id). DataFrame will be sorted by (time, instance_id). training_window (Timedelta or str, optional): Window defining how much time before the cutoff time data can be used when calculating features. If ``None``, all data before cutoff time is used. Defaults to ``None``. approximate (Timedelta or str): Frequency to group instances with similar cutoff times by for features with costly calculations. For example, if bucket is 24 hours, all instances with cutoff times on the same day will use the same calculation for expensive features. verbose (bool, optional): Print progress info. The time granularity is per chunk. chunk_size (int or float or None or "cutoff time"): Number of rows of output feature matrix to calculate at time. If passed an integer greater than 0, will try to use that many rows per chunk. If passed a float value between 0 and 1 sets the chunk size to that percentage of all instances. If passed the string "cutoff time", rows are split per cutoff time. n_jobs (int, optional): number of parallel processes to use when calculating feature matrix dask_kwargs (dict, optional): Dictionary of keyword arguments to be passed when creating the dask client and scheduler. Even if n_jobs is not set, using `dask_kwargs` will enable multiprocessing. Main parameters: cluster (str or dask.distributed.LocalCluster): cluster or address of cluster to send tasks to. If unspecified, a cluster will be created. diagnostics port (int): port number to use for web dashboard. If left unspecified, web interface will not be enabled. Valid keyword arguments for LocalCluster will also be accepted. save_progress (str, optional): path to save intermediate computational results. """ assert (isinstance(features, list) and features != [] and all([isinstance(feature, FeatureBase) for feature in features])), \ "features must be a non-empty list of features" # handle loading entityset from featuretools.entityset.entityset import EntitySet if not isinstance(entityset, EntitySet): if entities is not None and relationships is not None: entityset = EntitySet("entityset", entities, relationships) target_entity = entityset[features[0].entity.id] pass_columns = [] if not isinstance(cutoff_time, pd.DataFrame): if isinstance(cutoff_time, list): raise TypeError("cutoff_time must be a single value or DataFrame") if cutoff_time is None: if entityset.time_type == NumericTimeIndex: cutoff_time = np.inf else: cutoff_time = datetime.now() if instance_ids is None: index_var = target_entity.index df = target_entity._handle_time(target_entity.df, time_last=cutoff_time, training_window=training_window) instance_ids = df[index_var].tolist() cutoff_time = [cutoff_time] * len(instance_ids) map_args = [(id, time) for id, time in zip(instance_ids, cutoff_time)] cutoff_time = pd.DataFrame(map_args, columns=['instance_id', 'time']) cutoff_time = cutoff_time.reset_index(drop=True) # handle how columns are names in cutoff_time # maybe add _check_time_dtype helper function if "instance_id" not in cutoff_time.columns: if target_entity.index not in cutoff_time.columns: raise AttributeError( 'Name of the index variable in the target entity' ' or "instance_id" must be present in cutoff_time') # rename to instance_id cutoff_time.rename(columns={target_entity.index: "instance_id"}, inplace=True) if "time" not in cutoff_time.columns: # take the first column that isn't instance_id and assume it is time not_instance_id = [ c for c in cutoff_time.columns if c != "instance_id" ] cutoff_time.rename(columns={not_instance_id[0]: "time"}, inplace=True) # Check that cutoff_time time type matches entityset time type if entityset.time_type == NumericTimeIndex: if cutoff_time['time'].dtype.name not in PandasTypes._pandas_numerics: raise TypeError("cutoff_time times must be numeric: try casting " "via pd.to_numeric(cutoff_time['time'])") elif entityset.time_type == DatetimeTimeIndex: if cutoff_time['time'].dtype.name not in PandasTypes._pandas_datetimes: raise TypeError( "cutoff_time times must be datetime type: try casting via pd.to_datetime(cutoff_time['time'])" ) assert (cutoff_time[['instance_id', 'time']].duplicated().sum() == 0), \ "Duplicated rows in cutoff time dataframe." pass_columns = [column_name for column_name in cutoff_time.columns[2:]] if _check_time_type(cutoff_time['time'].iloc[0]) is None: raise ValueError("cutoff_time time values must be datetime or numeric") feature_set = FeatureSet(features) # make sure dtype of instance_id in cutoff time # is same as column it references target_entity = features[0].entity dtype = entityset[target_entity.id].df[target_entity.index].dtype cutoff_time["instance_id"] = cutoff_time["instance_id"].astype(dtype) # Get features to approximate if approximate is not None: _, all_approx_feature_set = gather_approximate_features(feature_set) else: all_approx_feature_set = None # Check if there are any non-approximated aggregation features no_unapproximated_aggs = True for feature in features: if isinstance(feature, AggregationFeature): # do not need to check if feature is in to_approximate since # only base features of direct features can be in to_approximate no_unapproximated_aggs = False break deps = feature.get_dependencies(deep=True, ignored=all_approx_feature_set) for dependency in deps: if isinstance(dependency, AggregationFeature): no_unapproximated_aggs = False break cutoff_df_time_var = 'time' target_time = '_original_time' num_per_chunk = calc_num_per_chunk(chunk_size, cutoff_time.shape) if approximate is not None: # If there are approximated aggs, bin times binned_cutoff_time = bin_cutoff_times(cutoff_time.copy(), approximate) # Think about collisions: what if original time is a feature binned_cutoff_time[target_time] = cutoff_time[cutoff_df_time_var] cutoff_time_to_pass = binned_cutoff_time else: cutoff_time_to_pass = cutoff_time if num_per_chunk == "cutoff time": iterator = cutoff_time_to_pass.groupby(cutoff_df_time_var) else: iterator = get_next_chunk(cutoff_time=cutoff_time_to_pass, time_variable=cutoff_df_time_var, num_per_chunk=num_per_chunk) chunks = [] if num_per_chunk == "cutoff time": for _, group in iterator: chunks.append(group) else: for chunk in iterator: chunks.append(chunk) if n_jobs != 1 or dask_kwargs is not None: feature_matrix = parallel_calculate_chunks( chunks=chunks, feature_set=feature_set, approximate=approximate, training_window=training_window, verbose=verbose, save_progress=save_progress, entityset=entityset, n_jobs=n_jobs, no_unapproximated_aggs=no_unapproximated_aggs, cutoff_df_time_var=cutoff_df_time_var, target_time=target_time, pass_columns=pass_columns, dask_kwargs=dask_kwargs or {}) else: feature_matrix = linear_calculate_chunks( chunks=chunks, feature_set=feature_set, approximate=approximate, training_window=training_window, verbose=verbose, save_progress=save_progress, entityset=entityset, no_unapproximated_aggs=no_unapproximated_aggs, cutoff_df_time_var=cutoff_df_time_var, target_time=target_time, pass_columns=pass_columns) feature_matrix = pd.concat(feature_matrix) feature_matrix.sort_index(level='time', kind='mergesort', inplace=True) if not cutoff_time_in_index: feature_matrix.reset_index(level='time', drop=True, inplace=True) if save_progress and os.path.exists(os.path.join(save_progress, 'temp')): shutil.rmtree(os.path.join(save_progress, 'temp')) return feature_matrix
test_fps = [ np_fps_act[i] for i in test_list[:num_test_actives] ] test_fps += [ np_fps_dcy[i] for i in test_list[num_test_actives:] ] test_mols = [[actives[i][0], 1] for i in test_list[:num_test_actives]] test_mols += [[decoys[i][0], 0] for i in test_list[num_test_actives:]] # rank based on probability single_score = ml.predict_proba(test_fps) # store: [probability, internal ID, active/inactive] single_score = [[s[1], m[0], m[1]] for s, m in zip(single_score, test_mols)] single_score.sort(reverse=True) scores["lr_" + fp_build].append(single_score) # write scores to file if do_append: outfile = gzip.open( outpath + "/list_" + dataset + "_" + str(target) + ".pkl.gz", "ab+", ) # binary format else: outfile = gzip.open( outpath + "/list_" + dataset + "_" + str(target) + ".pkl.gz", "wb+",
def runSlices(opsimName, metadata, simdata, fields, bins, args, opsDb, verbose=False): # Set up the movie slicer. movieslicer = setupMovieSlicer(simdata, bins) # Set up formatting for output suffix. sliceformat = '%s0%dd' % ('%', int(np.log10(len(movieslicer))) + 1) # Get the telescope latitude info. lat_tele = Site(name='LSST').latitude_rad # Run through the movie slicer slicePoints and generate plots at each point. for i, ms in enumerate(movieslicer): t = time.time() slicenumber = sliceformat % (i) if verbose: print(slicenumber) # Set up metrics. if args.movieStepsize != 0: tstep = args.movieStepsize else: tstep = ms['slicePoint']['binRight'] - bins[i] if tstep > 1: tstep = 40. / 24. / 60. / 60. # Add simple view of time to plot label. times_from_start = ms['slicePoint']['binRight'] - (int(bins[0]) + 0.16 - 0.5) # Opsim years are 365 days (not 365.25) years = int(times_from_start / 365) days = times_from_start - years * 365 plotlabel = 'Year %d Day %.4f' % (years, days) # Set up metrics. metricList, plotDictList = setupMetrics( opsimName, metadata, plotlabel=plotlabel, t0=ms['slicePoint']['binRight'], tStep=tstep, years=years, verbose=verbose) # Identify the subset of simdata in the movieslicer 'data slice' simdatasubset = simdata[ms['idxs']] # Set up opsim slicer on subset of simdata provided by movieslicer opslicer = slicers.OpsimFieldSlicer() # Set up metricBundles to combine metrics, plotdicts and slicer. bundles = [] sqlconstraint = '' for metric, plotDict in zip(metricList, plotDictList): bundles.append( metricBundles.MetricBundle(metric, opslicer, constraint=sqlconstraint, metadata=metadata, runName=opsimName, plotDict=plotDict)) # Remove (default) stackers from bundles, because we've already run them above on the original data. for mb in bundles: mb.stackerList = [] bundledict = metricBundles.makeBundlesDictFromList(bundles) # Set up metricBundleGroup to handle metrics calculation + plotting bg = metricBundles.MetricBundleGroup(bundledict, opsDb, outDir=args.outDir, resultsDb=None, saveEarly=False) # 'Hack' bundleGroup to just go ahead and run the metrics, without querying the database. simData = simdatasubset bg.fieldData = fields bg.setCurrent(sqlconstraint) bg.runCurrent(constraint=sqlconstraint, simData=simData) # Plot data each metric, for this slice of the movie, adding slicenumber as a suffix for output plots. # Plotting here, rather than automatically via sliceMetric method because we're going to rotate the sky, # and add extra legend info and figure text (for FilterColors metric). ph = plots.PlotHandler(outDir=args.outDir, figformat='png', dpi=72, thumbnail=False, savefig=False) obsnow = np.where(simdatasubset['observationStartMJD'] == simdatasubset['observationStartMJD'].max())[0] raCen = np.radians( np.mean(simdatasubset[obsnow]['observationStartLST'])) # Calculate horizon location. horizonlon, horizonlat = addHorizon(lat_telescope=lat_tele) # Create the plot for each metric and save it (after some additional manipulation). for mb in bundles: ph.setMetricBundles([mb]) fignum = ph.plot(plotFunc=plots.BaseSkyMap(), plotDicts={'raCen': raCen}) fig = plt.figure(fignum) ax = plt.gca() # Add horizon and zenith. plt.plot(horizonlon, horizonlat, 'k.', alpha=0.3, markersize=1.8) plt.plot(0, lat_tele, 'k+') # For the FilterColors metric, add some extra items. if mb.metric.name == 'FilterColors': # Add the time stamp info (plotlabel) with a fancybox. plt.figtext(0.75, 0.9, '%s' % (plotlabel), bbox=dict(boxstyle='Round, pad=0.7', fc='w', ec='k', alpha=0.5)) # Add a legend for the filters. filterstacker = stackers.FilterColorStacker() for i, f in enumerate(['u', 'g', 'r', 'i', 'z', 'y']): plt.figtext(0.92, 0.55 - i * 0.035, f, color=filterstacker.filter_rgb_map[f]) # Add a moon. moonRA = np.radians(np.mean(simdatasubset[obsnow]['moonRA'])) lon = -(moonRA - raCen - np.pi) % (np.pi * 2) - np.pi moonDec = np.radians(np.mean(simdatasubset[obsnow]['moonDec'])) # Note that moonphase is 0-100 (translate to 0-1). 0=new. moonPhase = np.mean(simdatasubset[obsnow]['moonPhase']) / 100. alpha = np.max([moonPhase, 0.15]) circle = Circle((lon, moonDec), radius=0.05, color='k', alpha=alpha) ax.add_patch(circle) # Add some explanatory text. ecliptic = Line2D([], [], color='r', label="Ecliptic plane") galaxy = Line2D([], [], color='b', label="Galactic plane") horizon = Line2D([], [], color='k', alpha=0.3, label="20 deg elevation limit") moon = Line2D([], [], color='k', linestyle='', marker='o', markersize=8, alpha=alpha, label="\nMoon (Dark=Full)\n (Light=New)") zenith = Line2D([], [], color='k', linestyle='', marker='+', markersize=5, label="Zenith") plt.legend( handles=[horizon, zenith, galaxy, ecliptic, moon], loc=[0.1, -0.35], ncol=3, frameon=False, title= 'Aitoff plot showing HA/Dec of simulated survey pointings', numpoints=1, fontsize='small') # Save figure. plt.savefig(os.path.join( args.outDir, mb.metric.name + '_' + slicenumber + '_SkyMap.png'), format='png', dpi=72) plt.close('all') dt, t = dtime(t) if verbose: print('Ran and plotted slice %s of movieslicer in %f s' % (slicenumber, dt))
def run(self, sensorRefList, calibType): """Process a calibration frame. @param sensorRef: sensor-level butler data reference @return pipe_base Struct containing these fields: - masterExpList: amp exposures of master calibration products """ referenceAmps = sensorRefList[0].subItems(level="channel") masterExpList = [] dataIdList = [] expmeta = None for amp in referenceAmps: if amp.dataId['snap'] == 1: continue self.log.info("Amp: Processing %s", amp.dataId) print("dataid %s" % (amp.dataId)) butler = amp.butlerSubset.butler ampMIList = [] for sRef in sensorRefList: self.log.info("Sensor: Processing %s", sRef.dataId) ampSnapMIList = [] dataId = eval(amp.dataId.__repr__()) dataId['visit'] = sRef.dataId['visit'] for snap in (0, 1): dataId['snap'] = snap ampExposure = sRef.butlerSubset.butler.get('raw', dataId) if expmeta is None: expmeta = ampExposure.getMetadata() expfilter = ampExposure.getFilter() expcalib = ampExposure.getCalib() ampDetector = ampExposure.getDetector() ampExposure = self.convertIntToFloat(ampExposure) ampExpDataView = ampExposure.Factory( ampExposure, ampDetector.getDiskDataSec()) self.saturationDetection(ampExposure, ampDetector) self.overscanCorrection(ampExposure, ampDetector) if calibType in ('flat', 'dark'): self.biasCorrection(ampExpDataView, amp) if False: self.darkCorrection(ampExpDataView, amp) self.updateVariance(ampExpDataView, ampDetector) ampSnapMIList.append(ampExpDataView.getMaskedImage()) ampMIList.append(self.combineMIList(ampSnapMIList)) masterFrame = self.combineMIList(ampMIList) # Fix saturation too??? self.fixDefectsAndSat(masterFrame, ampDetector) exp = afwImage.ExposureF(masterFrame) self.copyMetadata(exp, expmeta, calibType) exp.setDetector(ampDetector) exp.setWcs(None) exp.setCalib(expcalib) if calibType is 'flat': exp.setFilter(expfilter) if self.config.doWrite and calibType is not 'flat': print("writing file %s" % dataId) butler.put(exp, calibType, dataId=amp.dataId) masterExpList.append(exp) dataIdList.append(amp.dataId) if self.config.doWrite and calibType is 'flat': self.normChipAmps(masterExpList) for exp, dataId in zip(masterExpList, dataIdList): print("writing flat file %s" % dataId) butler.put(exp, calibType, dataId) return pipeBase.Struct(masterFrameList=masterExpList, )
def train_rcnn(network, dataset, image_set, root_path, dataset_path, frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, proposal): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # set up config config.TRAIN.BATCH_IMAGES = 2 config.TRAIN.BATCH_ROIS = 128 if proposal == 'ss': config.TRAIN.BG_THRESH_LO = 0.1 # reproduce Fast R-CNN # load symbol sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES) # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, proposal=proposal, append_gt=True, flip=not no_flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) means, stds = add_bbox_regression_targets(roidb) # load training data train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(list(zip(sym.list_arguments(), arg_shape))) out_shape_dict = dict(list(zip(sym.list_outputs(), out_shape))) aux_shape_dict = dict(list(zip(sym.list_auxiliary_states(), aux_shape))) print('output shape') pprint.pprint(out_shape_dict) # load and initialize params if resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # prepare training # create solver data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] if train_shared: fixed_param_prefix = config.FIXED_PARAMS_SHARED else: fixed_param_prefix = config.FIXED_PARAMS mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (old_div(1.0, batch_size)), 'clip_gradient': 5} # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)