def round_values(self): """ PURPOSE: To round the input parameters for lookup in the Claret table. """ self.rteff = np.round(self.teff/250) * 250 self.rlogg = np.round(self.logg/0.5) * 0.5
def label_nodes_with_class(nodes_xyt, class_maps, pix): """ Returns: class_maps__: one-hot class_map for each class. node_class_label: one-hot class_map for each class, nodes_xyt.shape[0] x n_classes """ # Assign each pixel to a node. selem = skimage.morphology.disk(pix) class_maps_ = class_maps*1. for i in range(class_maps.shape[2]): class_maps_[:,:,i] = skimage.morphology.dilation(class_maps[:,:,i]*1, selem) class_maps__ = np.argmax(class_maps_, axis=2) class_maps__[np.max(class_maps_, axis=2) == 0] = -1 # For each node pick out the label from this class map. x = np.round(nodes_xyt[:,[0]]).astype(np.int32) y = np.round(nodes_xyt[:,[1]]).astype(np.int32) ind = np.ravel_multi_index((y,x), class_maps__.shape) node_class_label = class_maps__.ravel()[ind][:,0] # Convert to one hot versions. class_maps_one_hot = np.zeros(class_maps.shape, dtype=np.bool) node_class_label_one_hot = np.zeros((node_class_label.shape[0], class_maps.shape[2]), dtype=np.bool) for i in range(class_maps.shape[2]): class_maps_one_hot[:,:,i] = class_maps__ == i node_class_label_one_hot[:,i] = node_class_label == i return class_maps_one_hot, node_class_label_one_hot
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None): """ Render the text """ if __debug__: verbose.report('RendererAgg.draw_text', 'debug-annoying') if ismath: return self.draw_mathtext(gc, x, y, s, prop, angle) flags = get_hinting_flag() font = self._get_agg_font(prop) if font is None: return None if len(s) == 1 and ord(s) > 127: font.load_char(ord(s), flags=flags) else: # We pass '0' for angle here, since it will be rotated (in raster # space) in the following call to draw_text_image). font.set_text(s, 0, flags=flags) font.draw_glyphs_to_bitmap(antialiased=rcParams['text.antialiased']) d = font.get_descent() / 64.0 # The descent needs to be adjusted for the angle xo, yo = font.get_bitmap_offset() xo /= 64.0 yo /= 64.0 xd = -d * np.sin(np.deg2rad(angle)) yd = d * np.cos(np.deg2rad(angle)) #print x, y, int(x), int(y), s self._renderer.draw_text_image( font, np.round(x - xd + xo), np.round(y + yd + yo) + 1, angle, gc)
def test_D_infinity_flat_closed_upper(): mg = RasterModelGrid((5, 4), xy_spacing=(1, 1)) z = mg.add_zeros("node", "topographic__elevation") z[mg.core_nodes] -= 1 mg.set_closed_boundaries_at_grid_edges( bottom_is_closed=True, left_is_closed=True, right_is_closed=True, top_is_closed=True, ) fd = FlowDirectorDINF(mg) fd.run_one_step() node_ids = np.arange(mg.number_of_nodes) true_recievers = -1 * np.ones(fd.receivers.shape) true_recievers[:, 0] = node_ids true_proportions = np.zeros(fd.proportions.shape) true_proportions[:, 0] = 1 assert_array_equal(fd.receivers, true_recievers) assert_array_equal( np.round(fd.proportions, decimals=6), np.round(true_proportions, decimals=6) )
def bandpass_filter(files, lowpass_freq, highpass_freq, fs): """Bandpass filter the input files Parameters ---------- files: list of 4d nifti files lowpass_freq: cutoff frequency for the low pass filter (in Hz) highpass_freq: cutoff frequency for the high pass filter (in Hz) fs: sampling rate (in Hz) """ out_files = [] for filename in filename_to_list(files): path, name, ext = split_filename(filename) out_file = os.path.join(os.getcwd(), name + '_bp' + ext) img = nb.load(filename) timepoints = img.shape[-1] F = np.zeros((timepoints)) lowidx = int(timepoints / 2) + 1 if lowpass_freq > 0: lowidx = np.round(float(lowpass_freq) / fs * timepoints) highidx = 0 if highpass_freq > 0: highidx = np.round(float(highpass_freq) / fs * timepoints) F[highidx:lowidx] = 1 F = ((F + F[::-1]) > 0).astype(int) data = img.get_data() if np.all(F == 1): filtered_data = data else: filtered_data = np.real(np.fft.ifftn(np.fft.fftn(data) * F)) img_out = nb.Nifti1Image(filtered_data, img.affine, img.header) img_out.to_filename(out_file) out_files.append(out_file) return list_to_filename(out_files)
def ss_calculate(self, f_ref, n_ref, f_pll_target, f_ss_mod_target, ss_p2p_frac_deviation_target): """ calculate all the spread-spectrum engine variables, such as: - FCW (for the NCO which generates the step clock) - number of steps in the triangle-wave - center-spread/down-spread - step-size """ f_pfd = f_ref / n_ref if f_pfd > self.f_ss_engine_max: n_ss_prescaler = int(f_pfd / self.f_ss_engine_max) + 1 else: n_ss_prescaler = 1 f_to_nco = f_pfd / n_ss_prescaler n_triwave_divide_max = f_to_nco / f_ss_mod_target * (self.fcw_max / 2**self.bits_ss_nco) b_triangle_wave_ideal = np.int(np.floor(np.log2(n_triwave_divide_max))) # n_overall = f_to_nco / f_ss_mod_target # b_triangle_wave_ideal = np.int(np.floor(np.log2(n_overall))) temp1 = min(b_triangle_wave_ideal, self.bits_triwave[1]) b_triangle_wave = max(temp1, self.bits_triwave[0]) f_nco_out_target = 2**b_triangle_wave * f_ss_mod_target fcw = np.round((f_nco_out_target / f_to_nco) * 2**self.bits_ss_nco).astype(int) f_ss_mod_actual = f_to_nco * fcw / 2**self.bits_ss_nco / 2**b_triangle_wave n_fbk_nominal = f_pll_target / f_pfd n_step = np.round(n_fbk_nominal * ss_p2p_frac_deviation_target / 2**(b_triangle_wave - 1) * 2**self.bits_fractional).astype(int) n_step = min(n_step, 2**self.bits_ss_step - 1) return b_triangle_wave, fcw, f_ss_mod_actual, n_step, n_ss_prescaler
def ReadBPLASMA(file_name,BNORM,Ns): #Read the BPLASMA output file from MARS-F #Return BM1, BM2, BM3 BPLASMA = num.loadtxt(open(file_name)) Nm1 = BPLASMA[0,0] n = num.round(BPLASMA[0,2]) Mm = num.round(BPLASMA[1:Nm1+1,0]) Mm.resize([len(Mm),1]) BM1 = BPLASMA[Nm1+1:,0] + BPLASMA[Nm1+1:,1]*1j BM2 = BPLASMA[Nm1+1:,2] + BPLASMA[Nm1+1:,3]*1j BM3 = BPLASMA[Nm1+1:,4] + BPLASMA[Nm1+1:,5]*1j BM1 = num.reshape(BM1,[Ns,Nm1],order='F') BM2 = num.reshape(BM2,[Ns,Nm1],order='F') BM3 = num.reshape(BM3,[Ns,Nm1],order='F') BM1 = BM1[0:Ns,:]*BNORM BM2 = BM2[0:Ns,:]*BNORM BM3 = BM3[0:Ns,:]*BNORM #NEED TO KNOW WHY THIS SECTION IS INCLUDED - to do with half grid???!! #BM2[1:,:] = BM2[0:-1,:] Needed to comment out to compare with RZPlot3 #BM3[1:,:] = BM3[0:-1,:] return BM1, BM2, BM3,Mm
def _paje_base(age, br_pf, isol, biact, smic55, _P, _option={'age': ENFS, 'smic55': ENFS}): ''' Prestation d'acceuil du jeune enfant - allocation de base ''' # TODO cumul des paje si et seulement si naissance multiples # TODO : théorie, il faut comparer les revenus de l'année n-2 à la bmaf de # l'année n-2 pour déterminer l'éligibilité avec le cf_seuil. Il faudrait # pouvoir déflater les revenus de l'année courante pour en tenir compte. P = _P.fam bmaf = P.af.bmaf bmaf2 = P.af.bmaf_n_2 base = round(P.paje.base.taux * bmaf, 2) base2 = round(P.paje.base.taux * bmaf2, 2) # L'allocation de base est versée jusqu'au dernier jour du mois civil précédant # celui au cours duquel l'enfant atteint l'âge de 3 ans. nbenf = nb_enf(age, smic55, 0, P.paje.base.age - 1) plaf_tx = (nbenf > 0) + P.paje.base.plaf_tx1 * min_(nbenf, 2) + P.paje.base.plaf_tx2 * max_(nbenf - 2, 0) majo = isol | biact plaf = P.paje.base.plaf * plaf_tx + (plaf_tx > 0) * P.paje.base.plaf_maj * majo plaf2 = plaf + 12 * base2 # TODO vérifier l'aspect différentielle de la PAJE et le plaf2 de la paje paje_base = (nbenf > 0) * ((br_pf < plaf) * base + (br_pf >= plaf) * max_(plaf2 - br_pf, 0) / 12) # non cumulabe avec la CF, voir Paje_CumulCf return 12 * paje_base # annualisé
def _apje(br_pf, age, smic55, isol, biact, _P, _option={'age': ENFS, 'smic55': ENFS}): ''' Allocation pour jeune enfant ''' # TODO: APJE courte voir doc ERF 2006 P = _P.fam nbenf = nb_enf(age, smic55, 0, P.apje.age - 1) bmaf = P.af.bmaf bmaf_n_2 = P.af.bmaf_n_2 base = round(P.apje.taux * bmaf, 2) base2 = round(P.apje.taux * bmaf_n_2, 2) plaf_tx = (nbenf > 0) + P.apje.plaf_tx1 * min_(nbenf, 2) + P.apje.plaf_tx2 * max_(nbenf - 2, 0) majo = isol | biact plaf = P.apje.plaf * plaf_tx + P.apje.plaf_maj * majo plaf2 = plaf + 12 * base2 apje = (nbenf >= 1) * ((br_pf <= plaf) * base + (br_pf > plaf) * max_(plaf2 - br_pf, 0) / 12.0) # Pour bénéficier de cette allocation, il faut que tous les enfants du foyer soient nés, adoptés, ou recueillis en vue d’une adoption avant le 1er janvier 2004, et qu’au moins l’un d’entre eux ait moins de 3 ans. # Cette allocation est verséE du 5ème mois de grossesse jusqu’au mois précédant le 3ème anniversaire de l’enfant. # Non cumul APE APJE CF # - L’allocation parentale d’éducation (APE), sauf pour les femmes enceintes. # L’APJE est alors versée du 5ème mois de grossesse jusqu’à la naissance de l’enfant. # - Le CF return 12*apje # annualisé
def kldiv_cs_model(prediction, fm): """ Computes Chao-Shen corrected KL-divergence between prediction and fdm made from fixations in fm. Parameters : prediction : np.ndarray a fixation density map fm : FixMat object """ # compute histogram of fixations needed for ChaoShen corrected kl-div # image category must exist (>-1) and image_size must be non-empty assert(len(fm.image_size) == 2 and (fm.image_size[0] > 0) and (fm.image_size[1] > 0)) assert(-1 not in fm.category) # check whether fixmat contains fixations if len(fm.x) == 0: return np.NaN (scale_factor, _) = calc_resize_factor(prediction, fm.image_size) # this specifies left edges of the histogram bins, i.e. fixations between # ]0 binedge[0]] are included. --> fixations are ceiled e_y = np.arange(0, np.round(scale_factor*fm.image_size[0]+1)) e_x = np.arange(0, np.round(scale_factor*fm.image_size[1]+1)) samples = np.array(zip((scale_factor*fm.y), (scale_factor*fm.x))) (fdm, _) = np.histogramdd(samples, (e_y, e_x)) # compute ChaoShen corrected kl-div q = np.array(prediction, copy = True) q[q == 0] = np.finfo(q.dtype).eps q /= np.sum(q) (H, pa, la) = chao_shen(fdm) q = q[fdm > 0] cross_entropy = -np.sum((pa * np.log2(q)) / la) return (cross_entropy - H)
def _af_majo(age, smic55, af_nbenf, _P, _option={'age': ENFS, 'smic55': ENFS}): ''' Allocations familiales - majoration pour âge 'fam' ''' # TODO: Date d'entrée en vigueur de la nouvelle majoration # enfants nés après le "1997-04-30" bmaf = _P.fam.af.bmaf P_af = _P.fam.af P = _P.fam.af.maj_age af_maj1 = round(bmaf * P.taux1, 2) af_maj2 = round(bmaf * P.taux2, 2) ageaine = age_aine(age, smic55, P_af.age1, P_af.age2) def age_sf_aine(age, ag1, ag2, ageaine): dum = (ag1 <= ageaine) & (ageaine <= ag2) return nb_enf(age, smic55, ag1, ag2) - dum * 1 nbenf_maj1 = ( (af_nbenf == 2)*age_sf_aine(age, P.age1, P.age2 - 1, ageaine) + nb_enf(age, smic55, P.age1, P.age2 - 1)*(af_nbenf >= 3) ) nbenf_maj2 = ( (af_nbenf == 2)*age_sf_aine(age, P.age2, P_af.age2, ageaine) + nb_enf(age, smic55, P.age2, P_af.age2)*(af_nbenf >= 3) ) af_majo = nbenf_maj1 * af_maj1 + nbenf_maj2 * af_maj2 return 12*af_majo # annualisé
def get_facet_values(facet, ra, dec, root="facet", default=0): """ Extract the value from a fits facet file """ import numpy as np from astropy.io import fits from astropy.wcs import WCS # TODO: Check astropy version # TODO: Check facet is a fits file with fits.open(facet) as f: shape = f[0].data.shape w = WCS(f[0].header) freq = w.wcs.crval[2] stokes = w.wcs.crval[3] xe, ye, _1, _2 = w.all_world2pix(ra, dec, freq, stokes, 1) x, y = np.round(xe).astype(int), np.round(ye).astype(int) # Dummy value for points out of the fits area x[(x < 0) | (x >= shape[-1])] = -1 y[(y < 0) | (y >= shape[-2])] = -1 data = f[0].data[0,0,:,:] values = data[y, x] # Assign the default value to NaNs and points out of the fits area values[(x == -1) | (y == -1)] = default values[np.isnan(values)] = default #TODO: Flexible format for other data types ? return np.array(["{}_{:.0f}".format(root, val) for val in values])
def test_maskandscale(): t = np.linspace(20, 30, 15) t[3] = 100 tm = np.ma.masked_greater(t, 99) fname = pjoin(TEST_DATA_PATH, 'example_2.nc') with netcdf_file(fname, maskandscale=True) as f: Temp = f.variables['Temperature'] assert_equal(Temp.missing_value, 9999) assert_equal(Temp.add_offset, 20) assert_equal(Temp.scale_factor, np.float32(0.01)) found = Temp[:].compressed() del Temp # Remove ref to mmap, so file can be closed. expected = np.round(tm.compressed(), 2) assert_allclose(found, expected) with in_tempdir(): newfname = 'ms.nc' f = netcdf_file(newfname, 'w', maskandscale=True) f.createDimension('Temperature', len(tm)) temp = f.createVariable('Temperature', 'i', ('Temperature',)) temp.missing_value = 9999 temp.scale_factor = 0.01 temp.add_offset = 20 temp[:] = tm f.close() with netcdf_file(newfname, maskandscale=True) as f: Temp = f.variables['Temperature'] assert_equal(Temp.missing_value, 9999) assert_equal(Temp.add_offset, 20) assert_equal(Temp.scale_factor, np.float32(0.01)) expected = np.round(tm.compressed(), 2) found = Temp[:].compressed() del Temp assert_allclose(found, expected)
def fitting(d0, d1): idx_list=[] pos_list=[] for tp in['beta', 'sw']: e=0 for net, sl in zip(['Net_0', 'Net_1'], [slice(2,4), slice(0,4)]): z=d0[tp][net]['mean_rates'][:,sl] target=d1[tp][net]['mean_rates'][sl] target=numpy.array([target]*z.shape[0]) if e is 0: e=z-target else: e=numpy.concatenate((z-target, e),axis=1) e**=2 e=numpy.sqrt(numpy.mean(e, axis=1)) idx=numpy.argsort(e) # idx_list.append(idx) # l=[] # for i, _id in enumerate(idx_list[-2]): # j=list(idx_list[-1]).index(_id) # l.append([i,j]) # l=numpy.array(l) # pos_list.append(l) # e=numpy.mean(l,axis=1) idx=numpy.argsort(e) # pp(list(l[idx,:])) # print idx # print e[idx] print tp for _id in idx[:100]: print d0[tp]['Net_0']['ylabels'][_id], d1[tp]['Net_0']['mean_rates'][:], numpy.round(d0[tp]['Net_0']['mean_rates'][_id,:],1),e[_id] print d0[tp]['Net_1']['ylabels'][_id], d1[tp]['Net_1']['mean_rates'][:], numpy.round(d0[tp]['Net_1']['mean_rates'][_id,:],1)
def on_draw1(self): """ Устанавливается режим тридэ. Очищается буфер. Загружается единичная матрица. Поворачивается всё по оси x (направление вверх/вниз), затем поворачивается по оси y (влево/вправо). Затем вызывается функция опреления высоты игрока над уровнем моря. И после этого мир двигается куда нужно. Порядок матриц ВАЖЕН. ОЧЕНЬ. Затем выбирается материал ящиков, рисуются ящики, выбирается материал стен комнаты, рисуется комната. Перенастраивается в 2д, пишется фпс. """ self.setup3d() glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) glLoadIdentity() glRotatef(self.player.xrot,1,0,0) glRotatef(self.player.yrot,0,1,0) glLightfv(GL_LIGHT1, GL_POSITION, vec(0,1,-1)) glTranslatef(-self.player.xpos,-self.player.ypos-self.player.height,self.player.zpos) # glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, vec(1,0.3,0,0)) # glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, vec(0,0,0,0)) glPointSize(20) glColor3f(0.5,0,0.2) glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, vec(0.75,0.52,0.15,0)) self.batch.draw() glColor3f(0,0,0) glEnable(GL_TEXTURE_2D) glBindTexture(self.boxtexture.target,self.boxtexture.id) self.batch_box.draw() glDisable(GL_TEXTURE_2D) glColor3f(0.5,0,0.2) model.draw() # glTranslatef(0,-20,0) # robot[self.robot_fr].draw() # glTranslatef(-5,0,0) # robot[self.robot_fr].draw() # glTranslatef(10,0,0) # robot[self.robot_fr].draw() # glTranslatef(-15,0,0) # robot[self.robot_fr].draw() # glTranslatef(20,0,0) # robot[self.robot_fr].draw() # glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, vec(0,0.3,0,0)) self.setup2d() # лейблы с координатами. Округлено до трёх знаков после запятой, всё в одну строку чтобы показывалось. pltxt = "X: "+str(np.round(self.player.xpos,3))+" Y: "+str(np.round(self.player.ypos,3))+" Z: "+str(np.round(self.player.zpos,3)) self.coords.text = pltxt self.coords.y = self.height-30 plrottxt = "Xrot: "+str(np.round(self.player.xrot,3))+" Yrot: "+str(np.round(self.player.yrot,3))+" Zrot: "+str(np.round(self.player.zrot,3)) self.rot.text = plrottxt self.rot.y = self.height-50 self.pljfw.text = self.player.jumping self.pljfw.y = self.height-70 self.times.text = str(np.round(self.time,3)) self.times.y = self.height-90 self.times.draw() self.pljfw.draw() self.coords.draw() self.rot.draw() self.fps.draw() self.picspr.draw()
def performFit(self): """ Fit Distribution with triple Guassian function and validate via Kolmogorov-Smirnov test. """ self.fill = False self.x0 = [len(self.Data)/2.,0.0,numpy.std(self.Data), \ len(self.Data)/3.,0.0,numpy.std(self.Data)*2, \ len(self.Data)/4.,0.0,numpy.std(self.Data)*0.5] self.popt, pcov = scipy.optimize.curve_fit(self.triple, self.hists.bin_centers, self.hist, p0=self.x0, sigma=None, absolute_sigma=False) print("in",self.x0) print("out",self.popt) sigmaw = self.getSigmaW() X = numpy.linspace(self.hists.bin_centers[0],self.hists.bin_centers[-1],1000) fithist = numpy.array([self.triple(x,*self.popt) for x in X]) # Kolmogorov smirnov test ks = scipy.stats.ks_2samp(self.hist, fithist) props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5) self.axis.text(0.6, 0.5, r'$KS-test: p='+str(numpy.round(ks[1],3))+'$', fontsize=20, bbox=props ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes) self.axis.text(0.6, 0.6, r'$<\Delta t> ='+str(numpy.round(sigmaw,3))+'ps$', fontsize=20, bbox=props ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes) self.set_plot_options( plot_kwargs={ 'marker':' ','linestyle':'-'}) p = self._plot_datapoints(self.axis, X,fithist, xerr=None , yerr=None) self.plots.append(p) return self
def get_histogram(series, bins, bins_decimals=0, bins_is_percent=False, block_count=100): """Creates a text-based histogram. Args: series: pandas.Series of numeric values. bins: List of boundaries between bins in ascending order. bins_decimals: Number of decimals to use for bins in format string. bins_is_percent: Whether to print a '%' character for bins. block_count: Total number of block characters in histogram. """ histogram = '' buckets = series.groupby(pd.cut(series, bins)).count() scaled_bins = 100 * bins if bins_is_percent else bins # Find the max string length for an individual bin value so that right # alignment works properly. max_bin_value_len = len(str(int(np.round(max(abs(scaled_bins)))))) + ( (bins_decimals + 1) if bins_decimals > 0 else 0) + ( 1 if min(scaled_bins) < 0 else 0) format_str = ' '.join(['{:' + str(max_bin_value_len) + '.' + str( bins_decimals) + ('f}%' if bins_is_percent else 'f}')] * 2) + ( ' {:<' + str(len(str(buckets.max()))) + '} {}\n') for i in range(buckets.size): # Due to rounding exact number of blocks may vary. histogram += format_str.format( scaled_bins[i], scaled_bins[i + 1], buckets[i], ''.join(['*'] * np.round( block_count * buckets[i] / series.size))) return histogram
def test_hemisphere_subdivide(): def flip(vertices): x, y, z = vertices.T f = (z < 0) | ((z == 0) & (y < 0)) | ((z == 0) & (y == 0) & (x < 0)) return 1 - 2*f[:, None] decimals = 6 # Test HemiSphere.subdivide # Create a hemisphere by dividing a hemi-icosahedron hemi1 = HemiSphere.from_sphere(unit_icosahedron).subdivide(4) vertices1 = np.round(hemi1.vertices, decimals) vertices1 *= flip(vertices1) order = np.lexsort(vertices1.T) vertices1 = vertices1[order] # Create a hemisphere from a subdivided sphere sphere = unit_icosahedron.subdivide(4) hemi2 = HemiSphere.from_sphere(sphere) vertices2 = np.round(hemi2.vertices, decimals) vertices2 *= flip(vertices2) order = np.lexsort(vertices2.T) vertices2 = vertices2[order] # The two hemispheres should have the same vertices up to their order nt.assert_array_equal(vertices1, vertices2) # Create a hemisphere from vertices hemi3 = HemiSphere(xyz=hemi1.vertices) nt.assert_array_equal(hemi1.faces, hemi3.faces) nt.assert_array_equal(hemi1.edges, hemi3.edges)
def on_press(event): try: self.lasttime = self.curtime except AttributeError: self.lasttime = 0 self.curtime = time.time() if np.round(self.curtime, 2) != np.round(self.lasttime, 2): tdiff = self.curtime - self.lasttime print "time diff", tdiff print "you pressed", event.button, event.xdata, event.ydata if tdiff < 0.25: tdiff = 0 x1, x2 = self.axes.get_xlim() y1, y2 = self.axes.get_ylim() print x1, x2, y1, y2 # ~ self.axes.set_xlim(x1/2,x2/2) # ~ self.axes.set_ylim(y1/2,y2/2) # self.axes.set_xlim(event.xdata-(x2-x1)/2, event.xdata+(x2-x1)/2) if event.button == 1: # zoom in self.axes.set_xlim(event.xdata - (x2 - x1) / 4, event.xdata + (x2 - x1) / 4) self.axes.set_ylim(event.ydata - (y2 - y1) / 4, event.ydata + (y2 - y1) / 4) if event.button == 3: # zoom out self.axes.set_xlim(event.xdata - (x2 - x1) * 1, event.xdata + (x2 - x1) * 1) self.axes.set_ylim(event.ydata - (y2 - y1) * 1, event.ydata + (y2 - y1) * 1) # self.axes.set_xlim(np.mean(event.xdata,x1), np.mean(event.xdata,x2)) # self.axes.set_ylim(np.mean(event.ydata,y1), np.mean(event.ydata,y2)) self.canvas.draw()
def Mie_ab(m,x): # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_ab mx = m*x nmax = np.round(2+x+4*(x**(1/3))) nmx = np.round(max(nmax,np.abs(mx))+16) n = np.arange(1,nmax+1) nu = n + 0.5 sx = np.sqrt(0.5*np.pi*x) px = sx*jv(nu,x) p1x = np.append(np.sin(x), px[0:int(nmax)-1]) chx = -sx*yv(nu,x) ch1x = np.append(np.cos(x), chx[0:int(nmax)-1]) gsx = px-(0+1j)*chx gs1x = p1x-(0+1j)*ch1x # B&H Equation 4.89 Dn = np.zeros(int(nmx),dtype=complex) for i in range(int(nmx)-1,1,-1): Dn[i-1] = (i/mx)-(1/(Dn[i]+i/mx)) D = Dn[1:int(nmax)+1] # Dn(mx), drop terms beyond nMax da = D/m+n/x db = m*D+n/x an = (da*px-p1x)/(da*gsx-gs1x) bn = (db*px-p1x)/(db*gsx-gs1x) return an, bn
def imageCoCenter(self, inst, algo): x1, y1, tmp = getCenterAndR_ef(self.image) if algo.debugLevel >= 3: print('imageCoCenter: (x1,y1)=(%8.2f,%8.2f)\n' % (x1, y1)) stampCenterx1 = inst.sensorSamples / 2. + 0.5 stampCentery1 = inst.sensorSamples / 2. + 0.5 radialShift = 3.5 * algo.upReso * \ (inst.offset / 1e-3) * (10e-6 / inst.pixelSize) radialShift = radialShift * self.fldr / 1.75 if (self.fldr > 1.75): radialShift = 0 if self.fldr != 0: I1c = self.fieldX / self.fldr I1s = self.fieldY / self.fldr else: I1c = 0 I1s = 0 stampCenterx1 = stampCenterx1 + radialShift * I1c stampCentery1 = stampCentery1 + radialShift * I1s self.image = np.roll(self.image, int( np.round(stampCentery1 - y1)), axis=0) self.image = np.roll(self.image, int( np.round(stampCenterx1 - x1)), axis=1)
def _scaling_fun(arr, scale): """ scales and rounds -- does it all in place. """ arr *= scale np.round(arr, out=arr) return arr
def query_form(filename="merged_table.ipac"): table = Table.read(os.path.join(app.config['DATABASE_FOLDER'], filename), format='ascii.ipac') tolerance=1.1 min_values=[np.round(min(table['SurfaceDensity'])/tolerance,4),np.round(min(table['VelocityDispersion'])/tolerance,4),np.round(min(table['Radius'])/tolerance,4)] max_values=[np.round(max(table['SurfaceDensity'])*tolerance,1),np.round(max(table['VelocityDispersion'])*tolerance,1),np.round(max(table['Radius'])*tolerance,1)] usetable = table[use_column_names] best_matches = {difflib.get_close_matches(vcn, usetable.colnames, n=1, cutoff=0.4)[0]: vcn for vcn in use_column_names if any(difflib.get_close_matches(vcn, usetable.colnames, n=1, cutoff=0.4)) } best_column_names = [best_matches[colname] if colname in best_matches else 'Ignore' for colname in usetable.colnames] return render_template("query_form.html", table=table, usetable=usetable, use_units=use_units, filename=filename, use_column_names=use_column_names, best_column_names=best_column_names, min_values=min_values, max_values=max_values )
def _to_dense(self): """ Convert the sparse [onset, duration, amplitude] representation typical of event files to a dense matrix where each row represents a fixed unit of time. """ end = int((self.events['onset'] + self.events['duration']).max()) targ_hz, orig_hz = self.target_hz, self.orig_hz len_ts = end * targ_hz conditions = self.events['condition'].unique().tolist() n_conditions = len(conditions) ts = np.zeros((len_ts, n_conditions)) _events = self.events.copy().reset_index() _events[['onset', 'duration']] = \ _events[['onset', 'duration']] * targ_hz / orig_hz cond_index = [conditions.index(c) for c in _events['condition']] ev_end = np.round(_events['onset'] + _events['duration']).astype(int) onsets = np.round(_events['onset']).astype(int) for i, row in _events.iterrows(): ts[onsets[i]:ev_end[i], cond_index[i]] = row['amplitude'] self.data = pd.DataFrame(ts, columns=conditions) onsets = np.arange(len(ts)) / self.target_hz self.data.insert(0, 'onset', onsets)
def whistler_test(SdB, freq, tw, fw): # Define the time step tStep = tw[1] - tw[0] # Initialize arrays highSum = numpy.zeros((len(tw),1)) # Set the minimum length of a whistler whistlerLength = 0.05 / tStep # How large of a window to use for the pre and post whistler energy window = int(numpy.round(1/tStep)) # Estimated length of a whistler, +-innerWindow not used in the mean pre/post energy innerWindow = int(numpy.round(0.1/tStep)) # Threshold is how much larger a whistler needs to be than the nearby noise threshold = 4. # Size of the smoothing filter n = 20 # Select power in frequency range freqRange = numpy.arange(find_closest(fw,freq[0]*1000),find_closest(fw,freq[1]*1000)) band = numpy.sum(SdB[freqRange,:],axis=0) # Smooth the data weights = numpy.repeat(1.0, n) / n band = numpy.convolve(band, weights)[n-1:-(n-1)] # Run through the band except for the first and last window points for center in range(window, len(highSum) - window): bandWindow = band[center - window : center + window] # Normalize the data to the lowest point in the bandWindow bandWindow = bandWindow - numpy.min(bandWindow) # Get the pre and post test point energy prePower = threshold * numpy.mean(bandWindow[:(window - innerWindow)]) postPower = threshold * numpy.mean(bandWindow[(window + innerWindow):]) # Compare the current point to the nearby noise level if bandWindow[window] > prePower and bandWindow[window] > postPower: highSum[center] = highSum[center - 1] + 1 # Check for any sustained signal longer then a whistler length whistlerTest = highSum > whistlerLength # Remove successive triggers (leading edge trigger) for i in range(len(whistlerTest)-1): n = len(whistlerTest) if whistlerTest[n-i-1]: whistlerTest[n-i] = False # Record the trigger times triggerTime = tw[whistlerTest[:,0]] return (triggerTime, freqRange)
def _as_timedelta64_scalar(time, unit=None): unit_args = [unit] if unit else [] flt_unit = unit if unit else 's' # turn 'H:M:S.ms', 'M:S.ms', 'S.ms' into floating point seconds if isinstance(time, string_types):# and ':' in time: time = [float(t) for t in time.lstrip('T').split(':')][::-1] if len(time) > 1 and unit is not None: raise ValueError("When giving time as a string, units are automatic") if len(time) > 3: raise ValueError("Timedelta as string only goes up to hours") t_flt = 0.0 for factor, t in zip([1, 60, 60 * 60], time): t_flt += factor * t time = t_flt flt_unit = 's' # turn floating point time into integer with the correct unit if is_datetime_like(time): time = as_datetime64(time) - as_datetime64(np.timedelta64(0, 's')) elif isinstance(time, (np.timedelta64, timedelta)): time = np.timedelta64(time).astype(_format_unit(unit, base=DELTA_BASE)) elif isinstance(time, (int, float, np.integer, np.floating)): orig_time, orig_flt_unit = time, flt_unit unit_idx = TIME_UNITS.index(flt_unit) while not np.isclose(time, int(np.round(time)), rtol=1e-4, atol=1e-18): if unit_idx <= 0: raise ValueError("Floating point time {0} [{1}] is too precise " "for any time unit?".format(orig_time, orig_flt_unit)) unit_idx -= 1 time *= TIME_SCALE[unit_idx] flt_unit = TIME_UNITS[unit_idx] time = np.timedelta64(int(np.round(time)), flt_unit) unit, unit_args = flt_unit, [flt_unit] return np.timedelta64(time, *unit_args)
def function(self, simulation, period): period = period.start.offset('first-of', 'month').period('month') age_holder = simulation.compute('age', period) smic55_holder = simulation.compute('smic55', period) af_nbenf = simulation.calculate('af_nbenf', period) P = simulation.legislation_at(period.start).fam.af age = self.split_by_roles(age_holder, roles = ENFS) smic55 = self.split_by_roles(smic55_holder, roles = ENFS) # TODO: Date d'entrée en vigueur de la nouvelle majoration # enfants nés après le "1997-04-30" bmaf = P.bmaf P_maj = P.maj_age af_maj1 = round(bmaf * P_maj.taux1, 2) af_maj2 = round(bmaf * P_maj.taux2, 2) ageaine = age_aine(age, smic55, P.age1, P.age2) def age_sf_aine(age, ag1, ag2, ageaine): dum = (ag1 <= ageaine) & (ageaine <= ag2) return nb_enf(age, smic55, ag1, ag2) - dum * 1 nbenf_maj1 = ( (af_nbenf == 2) * age_sf_aine(age, P_maj.age1, P_maj.age2 - 1, ageaine) + nb_enf(age, smic55, P_maj.age1, P_maj.age2 - 1) * (af_nbenf >= 3) ) nbenf_maj2 = ( (af_nbenf == 2) * age_sf_aine(age, P_maj.age2, P.age2, ageaine) + nb_enf(age, smic55, P_maj.age2, P.age2) * (af_nbenf >= 3) ) return period, nbenf_maj1 * af_maj1 + nbenf_maj2 * af_maj2
def coord_list_mapping_pbc(subset, superset, atol=1e-8): """ Gives the index mapping from a subset to a superset. Subset and superset cannot contain duplicate rows Args: subset, superset: List of frac_coords Returns: list of indices such that superset[indices] = subset """ c1 = np.array(subset) c2 = np.array(superset) diff = c1[:, None, :] - c2[None, :, :] diff -= np.round(diff) inds = np.where(np.all(np.abs(diff) < atol, axis=2))[1] # verify result (its easier to check validity of the result than # the validity of inputs) test = c2[inds] - c1 test -= np.round(test) if not np.allclose(test, 0): if not is_coord_subset_pbc(subset, superset): raise ValueError("subset is not a subset of superset") if not test.shape == c1.shape: raise ValueError("Something wrong with the inputs, likely duplicates " "in superset") return inds
def zigzag(a=.2,b=.6,c=.2,s=.2,p=4,N=100,z0=None,z1=None): """ z = zigzag(...) Inputs: (optional) a - float - length before zigzag b - float - length of zigzag c - float - length after zigzag s - float - size of zigzags p - int - number of zigzags N - int - number of samples z0,z1 - complex - endpoints for zigzag; neither or both must be given Outputs: z - complex N vector - x + 1.j*y pairs along zigzag x[0] = y[0] = 0; x[N-1] = a+b+c; y[N-1] = 0 """ x = np.linspace(0.,a+b+c,N); y = 0.*x mb = np.round(N*a/(a+b+c)); Mb = np.round(N*(a+b)/(a+b+c)) y[mb:Mb] = s*( np.mod( np.linspace(0.,p-.01,Mb-mb), 1. ) - 0.5 ) z = x + 1.j*y if z0 is not None and z1 is not None: z_ = z z = z0 + (z1-z0)*z #1/0 return z
def test_distance(self): """ """ self.assertRaises(AssertionError, distance, '', 0, 0, 0) self.assertRaises(AssertionError, distance, 0, '', 0, 0) self.assertRaises(AssertionError, distance, 0, 0, '', 0) self.assertRaises(AssertionError, distance, 0, 0, 0, '') self.assertRaises(AssertionError, distance, 0, np.array([0]), 0, 0) self.assertRaises(AssertionError, distance, 0, 0, 0, np.array([0])) self.assertRaises( AssertionError, distance, np.array([0, 1]), np.array([0]), 0, 0) self.assertRaises( AssertionError, distance, 0, 0, np.array([0, 1]), np.array([0])) self.assertEqual( round(distance(-1.8494, 53.1472, 0.1406, 52.2044), 4), 170.2563) self.assertEqual( round(distance(-86.67, 36.12, -118.40, 33.94, er=6372.8), 12), 2887.259950607111 ) r = 111.194926645 a = np.array([ [[[r*(5-1),r*(6-1)],[r*(7-1),r*(8-1)]], [[r*(5-2),r*(6-2)],[r*(7-2),r*(8-2)]]], [[[r*(5-3),r*(6-3)],[r*(7-3),r*(8-3)]], [[r*(5-4),r*(6-4)],[r*(7-4),r*(8-4)]]], ]) x = distance( np.zeros((2, 2)), np.array([[1, 2], [3, 4]]), np.zeros((2, 2)), np.array([[5, 6], [7, 8]]), ) self.assertTrue(np.array_equal(np.round(x, 7), np.round(a, 7)))
cm = confusion_matrix( y_true=Ytest_integer, y_pred=Ypredict_integer, sample_weight=None) # TODO:change to test set for final model pd.DataFrame(cm, columns=categories, index=categories).to_csv(path_to_dir + 'cm.csv') index_min = df_clas_rep['f1_score'].idxmin() index_max = df_clas_rep['f1_score'].idxmax() # write log file # ================================================================================================================ with open(path_to_dir + 'log.txt', 'a+') as f: f.write('\n\n') model.summary(print_fn=lambda x: f.write(x + '\n')) f.write('\n\n') for i, name in enumerate(model.metrics_names): f.write(name + ': ' + str(np.round(accuracy[i], 6)) + '\n') f.write('\n\n') f.write('Classification Report: \n' + df_clas_rep_latex) f.write('Lowest f1: ' + str(df_clas_rep['class'][index_min]) + ' ' + str(df_clas_rep['f1_score'][index_min]) + '\n') f.write('Highest f1: ' + str(df_clas_rep['class'][index_max]) + ' ' + str(df_clas_rep['f1_score'][index_max]) + '\n') f.write('\n Complete configuration: \n\n') pprint(model.get_config(), stream=f) # Save output_layers only for test set # ================================================================================================================ conv_1 = get_output(model, 'conv_1', layer_2d_or_1d='2d', Xtest=Xtest_encoded) pool_1 = get_output(model, 'pool_1', layer_2d_or_1d='2d', Xtest=Xtest_encoded) conv_2 = get_output(model, 'conv_2', layer_2d_or_1d='2d', Xtest=Xtest_encoded) pool_2 = get_output(model, 'pool_2', layer_2d_or_1d='2d', Xtest=Xtest_encoded)
vectorizer = TfidfVectorizer(min_df=10) vectorizer = vectorizer.fit(X) X_train_tfidf = vectorizer.transform(X_train) X_test_tfidf = vectorizer.transform(X_test) # Dimensionality Reduction lda = LinearDiscriminantAnalysis(n_components=10) lda = lda.fit(X_train_tfidf.toarray(), y_train) X_train_lda = lda.transform(X_train_tfidf.toarray()) X_test_lda = lda.transform(X_test_tfidf.toarray()) # Machine Learning clf = LogisticRegression(max_iter=10000).fit(X_train_lda, y_train) # Results X_train_pred = [np.round(i) for i in clf.predict(X_train_lda)] X_test_pred = [np.round(i) for i in clf.predict(X_test_lda)] print(classification_report(y_train, X_train_pred)) print(classification_report(y_test, X_test_pred)) #%% Alternate Result Printing results = pd.DataFrame(zip(y_test, X_test_pred)) results[2] = results[1] - results[0] results = pd.DataFrame(dict(Counter(results[2])).items()).sort_values(1) results[0] = results[0].apply(np.abs) results = results.groupby(0).sum() sum = results.sum().item() results["diff"] = results[1] / sum results = results.reset_index() results["diff"]
def sweep_table_generator(GATE_NAME, V_L, V_H, VSTEP, output_directory): if (GATE_NAME == "INV"): outfile = open(output_directory, "w") outfile.write("* sweep file \n\n") #### generate DC sweep data table #### outfile.write(".data sweep_DC\n") outfile.write("+ code sweep_Vin sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for Vin in r: for Vout in r: outfile.write(str(code) + " " + Vin + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") #### CM table ######################## outfile.write(".data sweep_CM\n") outfile.write("+ code sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for Vout in r: outfile.write(str(code) + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") #### CO table ######################## outfile.write(".data sweep_CO\n") outfile.write("+ code sweep_Vin \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for Vin in r: outfile.write(str(code) + " " + Vin + "\n") code += 1 outfile.write(".enddata\n\n") outfile.close() elif (GATE_NAME == "NAND2") or (GATE_NAME == "NOR2"): outfile = open(output_directory, "w") outfile.write("* sweep file \n\n") #### DC table ######################## outfile.write(".data sweep_DC\n") outfile.write("+ code sweep_VA sweep_VB sweep_Vn1 sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for VA in r: for VB in r: for Vn1 in r: for Vout in r: outfile.write(str(code) + " " + VA + " " + VB + " " + Vn1 + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") #### CM_A table ######################## outfile.write(".data sweep_CM_A\n") outfile.write("+ code sweep_VB sweep_Vn1 sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for VB in r: for Vn1 in r: for Vout in r: outfile.write(str(code) + " " + VB + " " + Vn1 + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") #### CM_B table ######################## outfile.write(".data sweep_CM_B\n") outfile.write("+ code sweep_VA sweep_Vn1 sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for VA in r: for Vn1 in r: for Vout in r: outfile.write(str(code) + " " + VA + " " + Vn1 + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") #### CO table ######################## outfile.write(".data sweep_CO\n") outfile.write("+ code sweep_VA sweep_VB sweep_Vn1 \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for VA in r: for VB in r: for Vn1 in r: outfile.write(str(code) + " " + VA + " " + VB + " " + Vn1 + "\n") code += 1 outfile.write(".enddata\n\n") #### CINT table ######################## outfile.write(".data sweep_CINT\n") outfile.write("+ code sweep_VA sweep_VB sweep_Vout \n") r = np.linspace(V_L, V_H, int((V_H - V_L) / VSTEP) + 1) r = [str(np.round(k, 2)) for k in r] code = 1 for VA in r: for VB in r: for Vout in r: outfile.write(str(code) + " " + VA + " " + VB + " " + Vout + "\n") code += 1 outfile.write(".enddata\n\n") outfile.close() else: print "Cannot generate sweep table, Invalid or not yet implemented gate name."
def ida(self, IMpath, dursPath, res_time=10.): """ Postprocess IDA results :param IMpath: str Path to IM file :param dursPath: str Path to the file containing durations of each record :param res_time: float Free vibrations time added to the end of the record :return: """ ''' Current structure of the single pickle file (a very large file I must say ) Record -> Runs -> EDP type (0=PFA, 1=Displacement, 2=PSD) -> -> array of shape [n x Record length] where n is nst for PSD, and nst + 1 for PFA and Displacement ''' """ The postprocessed file will have the following structure (looks a bit uncomfy though): 1. IDA; 2. summary_results (2 keys) 1.1 ground motions (n_rec keys) -> IM, ISDR, PFA, RISDR (4 keys) -> each being a list of size of number of runs 2.1 ground motions (n_rec keys) -> IM levels (n_runs keys) -> maxFA, maxISDR (2 keys) -> -> number of storeys (nst for maxISDR) /floors (nst+1 for maxFA) keys -> a single value """ # Read the IDA outputs with open(self.path, 'rb') as file: data = pickle.load(file) # Read the IDA IM levels IM = np.genfromtxt(IMpath, delimiter=',') # Read the durations of the records durs = list(pd.read_csv(dursPath, header=None)[0]) # Number of records nrecs = len(data) # Number of runs per each record nruns = len(data[list(data.keys())[0]]) # Initialize some variables im = np.zeros([nrecs, nruns + 1]) idx = np.zeros([nrecs, nruns], dtype='i') mpfa_us = np.full([nrecs, nruns], np.nan) mpsd_us = np.full([nrecs, nruns], np.nan) mrpsd_us = np.full([nrecs, nruns], np.nan) mtdisp_us = np.full([nrecs, nruns + 1], np.nan) mtrx = np.full([nrecs, nruns + 1], np.nan) mpfa = np.zeros([nrecs, nruns + 1]) mpsd = np.zeros([nrecs, nruns + 1]) mtdisp = np.zeros([nrecs, nruns + 1]) # Initialize target dictionary with its first stage res = {'IDA': {}, 'summary_results': {}} resKeys = list(res.keys()) # Loop for each record for rec in range(1, nrecs + 1): print("gm_%s" % rec) # Second stage of the dictionary res[resKeys[0]][rec] = { 'IM': [], 'ISDR': [], 'PFA': [], 'RISDR': [] } res[resKeys[1]][rec] = {} # Add IM values into the results file res[resKeys[0]][rec]["IM"] = IM[rec - 1] # Sort the IM values im[rec - 1, 1:] = np.sort(IM[rec - 1]) idx[rec - 1, :] = np.argsort(IM[rec - 1]) # Third stage of the dictionary for i in im[rec - 1, 1:]: i = str(np.round(i, 2)) res[resKeys[1]][rec][i] = { 'maxFA': {}, 'maxISDR': {}, 'maxRISDR': {} } # Loop over each run for run in range(1, nruns + 1): # Select analysis results of rec and run selection = data[rec - 1][run] # Get PFAs in g pfa = np.amax(abs(selection[0][:, 1:]), axis=1) # IML in g iml = str(np.round(IM[rec - 1][run - 1], 2)) for st in range(len(pfa)): res[resKeys[1]][rec][iml]["maxFA"][st] = pfa[st] mpfa_us[rec - 1, run - 1] = max(pfa) # Get PSDs in % psd = np.amax(abs(selection[2]), axis=1) for st in range(len(psd)): res[resKeys[1]][rec][iml]["maxISDR"][st + 1] = psd[st] mpsd_us[rec - 1, run - 1] = max(psd) # Getting the residual PSDs # Analysis time step dt = (durs[rec - 1] + res_time) / selection[0].shape[1] idxres = int((durs[rec - 1] - res_time) / dt) resDrifts = selection[2][:, idxres:] for st in range(len(psd)): res[resKeys[1]][rec][iml]["maxRISDR"][st + 1] = sum( resDrifts[st]) / len(resDrifts[st]) # Record the peak value of residual drift at each run for each record mrpsd_us[rec - 1, run - 1] = max( np.sum(resDrifts, axis=1) / resDrifts.shape[1]) # Get the top displacement in m top_disp = np.amax(abs(selection[1]), axis=1) mtdisp_us[rec - 1, run - 1] = top_disp[-1] # Sort the results res["IDA"][rec]["PFA"] = mpfa_us[run - 1, :] res["IDA"][rec]["ISDR"] = mpsd_us[run - 1, :] res["IDA"][rec]["RISDR"] = mrpsd_us[run - 1, :] # Repopulate nans with max of data # res["IDA"][rec]["RISDR"] = [max(res['IDA'][rec]['RISDR']) if math.isnan(x) else x for # x in res['IDA'][rec]['RISDR']] mpfa[rec - 1, 1:] = mpfa_us[rec - 1, :][idx[rec - 1]] mpsd[rec - 1, 1:] = mpsd_us[rec - 1, :][idx[rec - 1]] mtdisp[rec - 1, 1:] = mtdisp_us[rec - 1, :][idx[rec - 1]] # Fit the splines to the data mtdisp_range = np.linspace(0.01, 1, 200) # Quantile ranges to visualize for the IDAs qtile_range = np.array([0.16, 0.5, 0.84]) im_spl = np.zeros([nrecs, len(mtdisp_range)]) im_spl[:] = np.nan # Get the fitted IDA curves for each record for rec in range(nrecs): interpolator = interp1d(mtdisp[rec], im[rec]) for i in range(len(mtdisp_range)): if mtdisp_range[i] <= max(mtdisp[rec]): im_spl[rec][i] = interpolator(mtdisp_range[i]) if im_spl[rec][i] < im_spl[rec][i - 1]: im_spl[rec][i] = im_spl[rec][i - 1] else: im_spl[rec][i] = im_spl[rec][i - 1] # Get the IDA quantiles im_qtile = np.zeros([len(qtile_range), len(mtdisp_range)]) for q in range(len(qtile_range)): for i in range(len(mtdisp_range)): im_qtile[q][i] = np.quantile(im_spl[:, i], qtile_range[q]) # Creating a dictionary for the spline fits cache = { "im_spl": im_spl, "disp": mtdisp, "im": im, "im_qtile": im_qtile, "mtdisp": mtdisp_range } # Exporting if self.export: self.export_results(self.path.parents[0] / "ida_processed", res, "pickle") self.export_results(self.path.parents[0] / "ida_cache", cache, "pickle") print( "[SUCCESS] Postprocesssing complete. Results have been exported!" ) else: print("[SUCCESS] Postprocesssing complete.") return res, cache
def mean_diff_plot(m1, m2, sd_limit=1.96, ax=None, scatter_kwds=None, mean_line_kwds=None, limit_lines_kwds=None): """ Tukey's Mean Difference Plot. Tukey's Mean Difference Plot (also known as a Bland-Altman plot) is a graphical method to analyze the differences between two methods of measurement. The mean of the measures is plotted against their difference. For more information see https://en.wikipedia.org/wiki/Bland-Altman_plot Parameters ---------- m1, m2: pandas Series or array-like sd_limit : float, default 1.96 The limit of agreements expressed in terms of the standard deviation of the differences. If `md` is the mean of the differences, and `sd` is the standard deviation of those differences, then the limits of agreement that will be plotted will be md - sd_limit * sd, md + sd_limit * sd The default of 1.96 will produce 95% confidence intervals for the means of the differences. If sd_limit = 0, no limits will be plotted, and the ylimit of the plot defaults to 3 standard deviatons on either side of the mean. ax: matplotlib AxesSubplot instance, optional If `ax` is None, then a figure is created. If an axis instance is given, the mean difference plot is drawn on the axis. scatter_kwargs: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.scatter plotting method mean_line_kwds: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.axhline plotting method limit_lines_kwds: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.axhline plotting method Returns ------- fig : matplotlib Figure If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. References ---------- Bland JM, Altman DG (1986). "Statistical methods for assessing agreement between two methods of clinical measurement" Example -------- Load relevant libraries. >>> import statsmodels.api as sm >>> import numpy as np >>> import matplotlib.pyplot as plt Making a mean difference plot. >>> # Seed the random number generator. >>> # This ensures that the results below are reproducible. >>> np.random.seed(9999) >>> m1 = np.random.random(20) >>> m2 = np.random.random(20) >>> f, ax = plt.subplots(1, figsize = (8,5)) >>> sm.graphics.mean_diff_plot(m1, m2, ax = ax) >>> plt.show() .. plot:: plots/graphics-mean_diff_plot.py """ fig, ax = utils.create_mpl_ax(ax) if len(m1) != len(m2): raise ValueError('m1 does not have the same length as m2.') if sd_limit < 0: raise ValueError('sd_limit ({}) is less than 0.'.format(sd_limit)) means = np.mean([m1, m2], axis=0) diffs = m1 - m2 mean_diff = np.mean(diffs) std_diff = np.std(diffs, axis=0) scatter_kwds = scatter_kwds or {} if 's' not in scatter_kwds: scatter_kwds['s'] = 20 mean_line_kwds = mean_line_kwds or {} limit_lines_kwds = limit_lines_kwds or {} for kwds in [mean_line_kwds, limit_lines_kwds]: if 'color' not in kwds: kwds['color'] = 'gray' if 'linewidth' not in kwds: kwds['linewidth'] = 1 if 'linestyle' not in mean_line_kwds: kwds['linestyle'] = '--' if 'linestyle' not in limit_lines_kwds: kwds['linestyle'] = ':' ax.scatter(means, diffs, **scatter_kwds) # Plot the means against the diffs. ax.axhline(mean_diff, **mean_line_kwds) # draw mean line. # Annotate mean line with mean difference. ax.annotate('mean diff:\n{}'.format(np.round(mean_diff, 2)), xy=(0.99, 0.5), horizontalalignment='right', verticalalignment='center', fontsize=14, xycoords='axes fraction') if sd_limit > 0: half_ylim = (1.5 * sd_limit) * std_diff ax.set_ylim(mean_diff - half_ylim, mean_diff + half_ylim) limit_of_agreement = sd_limit * std_diff lower = mean_diff - limit_of_agreement upper = mean_diff + limit_of_agreement for j, lim in enumerate([lower, upper]): ax.axhline(lim, **limit_lines_kwds) ax.annotate('-SD{}: {}'.format(sd_limit, np.round(lower, 2)), xy=(0.99, 0.07), horizontalalignment='right', verticalalignment='bottom', fontsize=14, xycoords='axes fraction') ax.annotate('+SD{}: {}'.format(sd_limit, np.round(upper, 2)), xy=(0.99, 0.92), horizontalalignment='right', fontsize=14, xycoords='axes fraction') elif sd_limit == 0: half_ylim = 3 * std_diff ax.set_ylim(mean_diff - half_ylim, mean_diff + half_ylim) ax.set_ylabel('Difference', fontsize=15) ax.set_xlabel('Means', fontsize=15) ax.tick_params(labelsize=13) fig.tight_layout() return fig
batch_size=BATCH_SIZE, epochs=EPOCHS) # In[15]: score = model.evaluate(x_train, np.array(y_train), verbose=0) print('Train loss:', score[0]) print('Train accuracy:', score[1]) score = model.evaluate(x_test, np.array(y_test), verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # In[16]: pred = np.round(model.predict(x_test).reshape(-1)) # Calculate equity.. contracts = 2000.0 commission = 4 / 100000 #df_trade = pd.DataFrame(train_x[train_len:,-1], columns=['return']) df_trade = pd.DataFrame(np.array(data['return'][valid_len + n_features - 1:]), columns=['return']) df_trade['label'] = y_test df_trade['pred'] = pred df_trade['won'] = df_trade['label'] == df_trade['pred'] df_trade['return'] = df_trade['return'].shift(-1) #df_trade['return'] = df_trade['return'].shift(-1) * return_range
if __name__ == '__main__': import sys gt_imdb = [] path = sys.argv[1] #"lala" for item in os.listdir(path): gt_imdb.append(os.path.join(path,item)) print(gt_imdb) test_data = TestLoader(gt_imdb) all_boxes,landmarks = mtcnn_detector.detect_face(test_data) count = 0 for imagepath in gt_imdb: print(imagepath) image = cv2.imread(imagepath) for bbox in all_boxes[count]: cv2.putText(image,str(np.round(bbox[4],2)),(int(bbox[0]),int(bbox[1])),cv2.FONT_HERSHEY_TRIPLEX,1,color=(255,0,255)) cv2.rectangle(image, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) for landmark in landmarks[count]: for i in range(len(landmark)//2): cv2.circle(image, (int(landmark[2*i]),int(int(landmark[2*i+1]))), 3, (0,0,255)) count = count + 1 #cv2.imwrite("result_landmark/%d.png" %(count),image) #cv2.imshow("lala",image) #cv2.waitKey(0) cv2.imwrite(str(count) + '.jpg', image) ''' for data in test_data: print type(data)
def create_train_dataloaders_map_preview_adv(loader_type, args_data): """ Create the specific data loaders used for training """ # Sanity check if not loader_type == LoaderMode.TRAIN: print("Required loader-type {} not implemented.".format(loader_type)) raise UserWarning("Requested loaders only implemented for TRAINING.") # Max. #workers per train sample (to limit interference if few samples) max_ratio_workers = 0.02 args_data.min_samp_prob = 1.0 # here this is only used for labeled data do_use_gpu = args_data.cuda # Normalize ratios sum_ratios = float(args_data.ratio_corresponding_data_in_batch \ + args_data.ratio_synth_data_in_batch \ + args_data.ratio_unlabeled_data_in_batch) ratio_corr = args_data.ratio_corresponding_data_in_batch / sum_ratios ratio_synt = args_data.ratio_synth_data_in_batch / sum_ratios print("Creating loader for SYNTHETIC PRE-TRAINING data") loader_pretrain = create_loader_synth_data(loader_type, args_data, do_use_gpu, args_data.num_loader_workers, batch_size=args_data.batch_size, seed=args_data.seed) print("Creating loader for corresponding REAL<->SYNTHETIC data") num_corr = int(np.round(args_data.batch_size * ratio_corr)) # Ensure reasonable number of workers num_workers_corr = min( int(np.round(args_data.num_loader_workers * ratio_corr)), int(np.round(args_data.num_labeled_samples * max_ratio_workers))) # num_workers_corr = max(1, num_workers_corr) loader_corr, ids_train_permuted = create_loader_corresponding_real_synth_data( loader_type, args_data, do_use_gpu, num_workers_corr, batch_size=num_corr) print("Creating loader for separate SYNTHETIC data") num_synth = int(np.round(args_data.batch_size * ratio_synt)) num_workers_synth = int(np.round(args_data.num_loader_workers * ratio_synt)) # num_workers_synth = max(1, num_workers_synth) loader_synth = create_loader_synth_data(loader_type, args_data, do_use_gpu, num_workers_synth, batch_size=num_synth, seed=args_data.seed - 2) print( "Creating loader for view prediction, unlabeled REAL and SYNTHETIC data" ) num_ul_prev = int( np.round((args_data.batch_size - num_corr - num_synth) / 2.0)) num_ul_prev = max(1, num_ul_prev) num_workers_ul_prev = int(np.round((args_data.num_loader_workers - num_workers_corr \ - num_workers_synth) / 2.0)) # num_workers_ul_prev = max(1, num_workers_ul_prev) cam_ids_corr = np.intersect1d(args_data.cam_ids_for_pose_train_real, args_data.cam_ids_for_pose_train_synth) if len(cam_ids_corr) > 1: raise UserWarning("Only one corresponding cam assumed during training \ (corr. view point between real and synth. data, same view, \ similar distribution for adv. training and view prediction)") # No rotation jitter, no cubesize jitter used_device = args_data.used_device args_data.used_device = None # there were issues with copying the device; ignoring it - it's not needed here args_data_temp = copy.deepcopy(args_data) args_data_temp.do_jitter_rotation = [False, False, False] args_data_temp.do_jitter_cubesize = [False, False, False] args_data_temp.sigma_com[args_data.output_cam_ids_train - 1] = 0.0 min_samp_prob_labeled = 0.3 needed_cam_ids = np.append(args_data_temp.output_cam_ids_train, 1) # input cam ID is always 1 for now id_range = [args_data.id_start_train, args_data.id_end_train + 1] loader_preview = create_loader_preview( loader_type, args_data_temp, do_use_gpu, num_workers_ul_prev, min_samp_prob_labeled, num_ul_prev, needed_cam_ids, needed_cam_ids, args_data.seed - 3, id_range, ids_train_permuted=ids_train_permuted) args_data.used_device = used_device # restoring device print( "Creating loaders for NON-corresponding, unlabeled REAL and SYNTHETIC data" ) num_ul_wc = args_data.batch_size - num_corr - num_synth - num_ul_prev num_ul_wc = max(1, num_ul_wc) num_workers_ul1 = int(np.round((args_data.num_loader_workers - num_workers_corr \ - num_workers_synth - num_workers_ul_prev) / 2.0)) # num_workers_ul1 = max(1, num_workers_ul1) num_workers_ul2 = args_data.num_loader_workers - num_workers_corr \ - num_workers_synth - num_workers_ul_prev - num_workers_ul1 # num_workers_ul2 = max(1, num_workers_ul2) num_labeled_samples_ul = 0 min_samp_prob_ul = 0.0 print(" REAL") loader_real_weakcorr_ul = create_independent_data_loader( loader_type, args_data, do_use_gpu, num_workers_ul1, num_labeled_samples_ul, min_samp_prob=min_samp_prob_ul, batch_size=num_ul_wc, cam_ids_real=cam_ids_corr, cam_ids_synth=[], seed=args_data.seed - 4) print(" SYNTH") loader_synth_weakcorr_ul = create_independent_data_loader( loader_type, args_data, do_use_gpu, num_workers_ul2, num_labeled_samples_ul, min_samp_prob=min_samp_prob_ul, batch_size=num_ul_wc, cam_ids_real=[], cam_ids_synth=cam_ids_corr, seed=args_data.seed - 5) return TrainLoaders(train_loader=[], loader_pretrain=loader_pretrain, loader_corr=loader_corr, loader_real=[], loader_synth=loader_synth, loader_real_weakcorr_ul=loader_real_weakcorr_ul, loader_synth_weakcorr_ul=loader_synth_weakcorr_ul, loader_preview=loader_preview)
avgX = 0 avgY = 0 avgR = 0 count_Circ = 0 imgList = np.array([threshImg1, threshImg2]) #Runs through all the added Images for i in imgList: circles = cv2.HoughCircles(i,cv2.HOUGH_GRADIENT,1.2,5, param1=210,param2=135,minRadius=1,maxRadius = 200) if circles is not None: count_Circ+=1 #Magic shit, no touchy circles = np.round(circles[0, :]).astype("int") #Gets the largest value biggest_Circle = lgst(circles) x=circles[biggest_Circle][0] y=circles[biggest_Circle][1] r=circles[biggest_Circle][2] avgR+=r avgX+=x avgY+=y #cv2.circle(image, (math.floor(avgX), math.floor(avgY)), math.floor(avgR), (255,255,0), 5) if(not count_Circ==0): avgR/=count_Circ
print(param_search.best_params_) print("Best CV Score:") print(-param_search.best_score_) best_params = param_search.best_params_ # %% Training the model with full data and optimized hyperparameters knn_model = KNeighborsRegressor(**best_params) knn_model.fit(X, y) pred = knn_model.predict(X_valid) pred_lon = pred[:, 0] pred_lat = pred[:, 1] pred_floor = np.round(pred[:, 2], decimals=0) pred_building = np.round(pred[:, 3], decimals=0) distance = distance75(y_valid, pred) score = np.mean(distance) lon_score = np.mean(np.absolute(pred_lon - y_valid.lon)) lat_score = np.mean(np.absolute(pred_lat - y_valid.lat)) right_floor = np.round(np.mean(pred_floor == y_valid.floor) * 100, 2) right_building = np.round(np.mean(pred_building == y_valid.building) * 100, 2) predictions = pd.DataFrame({ "LATITUDE": pred_lat, "LONGITUDE": pred_lon, "FLOOR": pred_floor, "distance": distance, })
def to_pixel_coords(x): return np.round(x / x[-1])
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=None, metric_items=None): """Evaluation in COCO protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float], optional): IoU threshold used for evaluating recalls/mAPs. If set to a list, the average of all IoUs will also be computed. If not specified, [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used. Default: None. metric_items (list[str] | str, optional): Metric items that will be returned. If not specified, ``['AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ]`` will be used when ``metric=='proposal'``, ``['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']`` will be used when ``metric=='bbox' or metric=='segm'``. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] #metrics = ['bbox'] ## #metric = 'bbox' allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') if iou_thrs is None: iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] result_files, tmp_dir = self.format_results(results, jsonfile_prefix) eval_results = OrderedDict() cocoGt = self.coco for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError(f'{metric} is not in results') try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = self.cat_ids cocoEval.params.imgIds = self.img_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { 'mAP': 0, 'mAP_50': 1, 'mAP_75': 2, 'mAP_s': 3, 'mAP_m': 4, 'mAP_l': 5, 'AR@100': 6, 'AR@300': 7, 'AR@1000': 8, 'AR_s@1000': 9, 'AR_m@1000': 10, 'AR_l@1000': 11 } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError( f'metric item {metric_item} is not supported') if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if metric_items is None: metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for item in metric_items: val = float( f'{cocoEval.stats[coco_metric_names[item]]:.3f}') eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) if metric_items is None: metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for metric_item in metric_items: key = f'{metric}_{metric_item}' val = float( f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' ) eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() return eval_results
# #### Top Restaurant in each city (First 5 examples) df.groupby(['Country', 'City', 'Restaurant Name' ])['Aggregate rating'].mean().reset_index().sort_values( ['Country', 'City', 'Aggregate rating'], ascending=False).reset_index(drop=True).groupby( ['Country', 'City']).head(1).reset_index(drop=True).head() # #### Top 10 Restaurants with maximum branches on Zomato df.groupby('Restaurant Name')['Restaurant Name'].count().sort_values( ascending=False).head(10) expanded_df = df.groupby(['Currency', 'Price range' ])['Average Cost for two'].mean().reset_index() expanded_df['Average Cost for two'] = expanded_df[ 'Average Cost for two'].apply(lambda x: np.round(x, 0)) expanded_df.pivot(index='Currency', columns='Price range', values='Average Cost for two').fillna(0) # The table shows the average cost for two at different price ranges for different currencies. There is a positive correlation between average cost for two and price range. We can also see Dollar and Pound as the strongest currencies and Indonesian Rupiah to be the weakest. # ## Top 10 Most popular restaurants (Max no of Votes) ** plt.figure(figsize=(20, 10)) df.groupby(['Country', 'City', 'Restaurant Name'])['Votes'].count().reset_index().sort_values( ['Country', 'City', 'Votes'], ascending=False).groupby([ 'Country', 'City' ]).head(1).sort_values('Votes', ascending=False).reset_index( drop=True).head(10).plot('Restaurant Name', 'Votes',
def luminance(im): if len(im.shape) == 2: return im else: # see http://www.mathworks.com/help/toolbox/images/ref/rgb2gray.html return np.uint8(np.round(0.2989 * im[:,:,0] + 0.587 * im[:,:,1] + 0.114 * im[:,:,2]))
from sklearn.decomposition import PCA pca = PCA(n_components=len(good_data.columns)).fit(good_data) pca_samples = pca.transform(log_samples) explained_var=pca.explained_variance_ratio_ totl=0 explained_var2=sum([explained_var[i] for i in range(2)]) explained_var4=sum([explained_var[i] for i in range(4)]) print ('Total Variance from first 2 components:',explained_var2) print ('Total Variance from first 2 components:',explained_var4) pca_results = vs.pca_results(good_data, pca) pca = PCA(n_components=2).fit(good_data) reduced_data = pca.transform(good_data) pca_samples = pca.transform(log_samples) reduced_data = pd.DataFrame(reduced_data, columns = ['Dimension 1', 'Dimension 2']) display(pd.DataFrame(np.round(pca_samples, 4), columns = ['Dimension 1', 'Dimension 2'])) vs.biplot(good_data, reduced_data, pca) from sklearn.mixture import GMM from sklearn.metrics import silhouette_score clusterer = GMM(n_components=2).fit(reduced_data) preds = clusterer.predict(reduced_data) centers = clusterer.means_ sample_preds = clusterer.predict(pca_samples) score = silhouette_score(reduced_data,preds) print (score) vs.cluster_results(reduced_data, preds, centers, pca_samples)
def history(self, period="1mo", interval="1d", start=None, end=None, prepost=False, actions=True, auto_adjust=True, back_adjust=False, proxy=None, rounding=False, tz=None, **kwargs): """ :Parameters: period : str Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max Either Use period parameter or use start and end interval : str Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo Intraday data cannot extend last 60 days start: str Download start date string (YYYY-MM-DD) or _datetime. Default is 1900-01-01 end: str Download end date string (YYYY-MM-DD) or _datetime. Default is now prepost : bool Include Pre and Post market data in results? Default is False auto_adjust: bool Adjust all OHLC automatically? Default is True back_adjust: bool Back-adjusted data to mimic true historical prices proxy: str Optional. Proxy server URL scheme. Default is None rounding: bool Round values to 2 decimal places? Optional. Default is False = precision suggested by Yahoo! tz: str Optional timezone locale for dates. (default data is returned as non-localized dates) **kwargs: dict debug: bool Optional. If passed as False, will suppress error message printing to console. """ if start or period is None or period.lower() == "max": if start is None: start = -2208988800 elif isinstance(start, _datetime.datetime): start = int(_time.mktime(start.timetuple())) else: start = int( _time.mktime(_time.strptime(str(start), '%Y-%m-%d'))) if end is None: end = int(_time.time()) elif isinstance(end, _datetime.datetime): end = int(_time.mktime(end.timetuple())) else: end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d'))) params = {"period1": start, "period2": end} else: period = period.lower() params = {"range": period} params["interval"] = interval.lower() params["includePrePost"] = prepost params["events"] = "div,splits" # 1) fix weired bug with Yahoo! - returning 60m for 30m bars if params["interval"] == "30m": params["interval"] = "15m" # setup proxy in requests format if proxy is not None: if isinstance(proxy, dict) and "https" in proxy: proxy = proxy["https"] proxy = {"https": proxy} # Getting data from json url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) data = _requests.get(url=url, params=params, proxies=proxy) if "Will be right back" in data.text: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" "Our engineers are working quickly to resolve " "the issue. Thank you for your patience.") data = data.json() # Work with errors debug_mode = True if "debug" in kwargs and isinstance(kwargs["debug"], bool): debug_mode = kwargs["debug"] err_msg = "No data found for this date range, symbol may be delisted" if "chart" in data and data["chart"]["error"]: err_msg = data["chart"]["error"]["description"] shared._DFS[self.ticker] = utils.empty_df() shared._ERRORS[self.ticker] = err_msg if "many" not in kwargs and debug_mode: print('- %s: %s' % (self.ticker, err_msg)) return shared._DFS[self.ticker] elif "chart" not in data or data["chart"]["result"] is None or \ not data["chart"]["result"]: shared._DFS[self.ticker] = utils.empty_df() shared._ERRORS[self.ticker] = err_msg if "many" not in kwargs and debug_mode: print('- %s: %s' % (self.ticker, err_msg)) return shared._DFS[self.ticker] # parse quotes try: quotes = utils.parse_quotes(data["chart"]["result"][0], tz) except Exception: shared._DFS[self.ticker] = utils.empty_df() shared._ERRORS[self.ticker] = err_msg if "many" not in kwargs and debug_mode: print('- %s: %s' % (self.ticker, err_msg)) return shared._DFS[self.ticker] # 2) fix weired bug with Yahoo! - returning 60m for 30m bars if interval.lower() == "30m": quotes2 = quotes.resample('30T') quotes = _pd.DataFrame(index=quotes2.last().index, data={ 'Open': quotes2['Open'].first(), 'High': quotes2['High'].max(), 'Low': quotes2['Low'].min(), 'Close': quotes2['Close'].last(), 'Adj Close': quotes2['Adj Close'].last(), 'Volume': quotes2['Volume'].sum() }) try: quotes['Dividends'] = quotes2['Dividends'].max() except Exception: pass try: quotes['Stock Splits'] = quotes2['Dividends'].max() except Exception: pass if auto_adjust: quotes = utils.auto_adjust(quotes) elif back_adjust: quotes = utils.back_adjust(quotes) if rounding: quotes = _np.round(quotes, data["chart"]["result"][0]["meta"]["priceHint"]) quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64) quotes.dropna(inplace=True) # actions dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz) # combine df = _pd.concat([quotes, dividends, splits], axis=1, sort=True) df["Dividends"].fillna(0, inplace=True) df["Stock Splits"].fillna(0, inplace=True) # index eod/intraday df.index = df.index.tz_localize("UTC").tz_convert( data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]) if params["interval"][-1] == "m": df.index.name = "Datetime" else: df.index = _pd.to_datetime(df.index.date) if tz is not None: df.index = df.index.tz_localize(tz) df.index.name = "Date" self._history = df.copy() if not actions: df.drop(columns=["Dividends", "Stock Splits"], inplace=True) return df
def __init__(self, inFile, textColumn, labelColumn, testSize, randomizeTestSet=False, preComputedEmbedding=False, MAX_NUM_WORDS=100000): vocabFile = "/nas/home/xiangcil/bio_corpus/abstracts/bioVocab50.txt" df = pd.read_csv(inFile, sep='\t', header=0, index_col=0, engine='python') # Remove records with missing data. df = df[pd.notnull(df[textColumn])] n_rec = df.shape[0] if randomizeTestSet: test_ids = sorted(random.sample(range(n_rec), int(testSize))) self.randomized = True else: test_ids = range(int(testSize)) train_ids = [] for i in range(n_rec): if i not in test_ids: train_ids.append(i) df_train = df.iloc[train_ids, :] df_test = df.iloc[test_ids, :] labels = df[labelColumn].unique().tolist() y_train_base = [labels.index(i) for i in df_train[labelColumn]] y_test_base = [labels.index(i) for i in df_test[labelColumn]] # analyze word distribution df_train['doc_len'] = df_train[textColumn].apply( lambda words: len(words.split(" "))) self.mean_seq_len = np.round(df_train['doc_len'].mean()).astype(int) self.max_seq_len = np.round(df_train['doc_len'].mean() + df_train['doc_len'].std()).astype(int) np.random.seed(0) if preComputedEmbedding: if textColumn == "evd_frg": embedding_file = "/nas/home/xiangcil/figure_classification/evidence_fragment.hdf5" elif textColumn == "text": embedding_file = "/nas/home/xiangcil/figure_classification/text.hdf5" else: assert False, "Wrong text column!" self.x_train = np.zeros( (len(train_ids), self.max_seq_len, 1024 * 3)) self.x_test = np.zeros((len(test_ids), self.max_seq_len, 1024 * 3)) self.train_ids = train_ids self.test_ids = test_ids with h5py.File(embedding_file, 'r') as fin: for index, i in tqdm(enumerate(train_ids)): #matrix = np.max(fin[str(i)][()],axis=0) # Max pooling matrix = np.concatenate( (fin[str(i)][()][0, :, :], fin[str(i)][()][1, :, :], fin[str(i)][()][2, :, :]), axis=1) sentence_length = np.min( [matrix.shape[0], self.max_seq_len]) self.x_train[ index, : sentence_length, :] = matrix[:sentence_length, :] for index, i in tqdm(enumerate(test_ids)): #matrix = np.max(fin[str(i)][()],axis=0) matrix = np.concatenate( (fin[str(i)][()][0, :, :], fin[str(i)][()][1, :, :], fin[str(i)][()][2, :, :]), axis=1) sentence_length = np.min( [matrix.shape[0], self.max_seq_len]) self.x_test[ index, : sentence_length, :] = matrix[:sentence_length, :] self.n_classes = len(labels) self.labels = labels self.y_train = keras.utils.to_categorical( y_train_base, num_classes=self.n_classes) self.y_test = keras.utils.to_categorical( y_test_base, num_classes=self.n_classes) self.train_size = self.y_train.shape[0] self.test_size = self.y_test.shape[0] else: self.MAX_NB_WORDS = 600000 raw_docs_train = df_train[textColumn].tolist() raw_docs_test = df_test[textColumn].tolist() self.raw_docs_train = np.array(raw_docs_train, dtype=str)[:, np.newaxis] self.raw_docs_test = np.array(raw_docs_test, dtype=str)[:, np.newaxis] print("pre-processing train data...") vocab = [] with open(vocabFile, "r") as vfile: for line in vfile: vocab.append(line.strip()) self.vocab = set(vocab) processed_docs_train = [] for doc in raw_docs_train: filtered = [] tokens = doc.split() for word in tokens: word = self._clean_url(word) word = self._clean_num(word) if word not in self.vocab: word = "<UNK>" filtered.append(word) processed_docs_train.append(" ".join(filtered)) processed_docs_test = [] for doc in raw_docs_test: filtered = [] tokens = doc.split() for word in tokens: word = self._clean_url(word) word = self._clean_num(word) if word not in self.vocab: word = "<UNK>" filtered.append(word) processed_docs_test.append(" ".join(filtered)) print("tokenizing input data...") tokenizer = Tokenizer(num_words=self.MAX_NB_WORDS, lower=True, char_level=False) tokenizer.fit_on_texts(processed_docs_train + processed_docs_test) #leaky word_seq_train = tokenizer.texts_to_sequences(processed_docs_train) word_seq_test = tokenizer.texts_to_sequences(processed_docs_test) self.word_index = tokenizer.word_index print("dictionary size: ", len(self.word_index)) #pad sequences self.x_train = sequence.pad_sequences(word_seq_train, maxlen=self.max_seq_len) self.x_test = sequence.pad_sequences(word_seq_test, maxlen=self.max_seq_len) self.n_classes = len(labels) self.labels = labels self.y_train = keras.utils.to_categorical( y_train_base, num_classes=self.n_classes) self.y_test = keras.utils.to_categorical( y_test_base, num_classes=self.n_classes)
def figure_2(): if not os.path.exists("Data/figures2.csv"): np.random.seed(8) res_to_plot_list = [] res0_to_plot_list = [] res_to_plot_q_list = [] res0_to_plot_q_list = [] Kalpha_list = [] Kalpha_list_q = [] olscoeffs_list = [] for year in ['80', '90', '00']: data = pd.read_stata('Data/census' + year + '.dta') # print( data) data_q = data data_q['one'] = 1. n = data.shape[0] B = 500 b = np.round(5 * n ** (2 / 5.)) R = np.matrix([0, 1, 0, 0, 0]).T R_q = data_q[["one", "educ", "exper", "exper2", "black"]].multiply( data['perwt'], axis='index' ).mean(axis=0) alpha = 0.05 taus = np.arange(1, 10) / 10. ntaus = len(taus) formula = 'logwk~educ+exper+exper2+black' V_list = [] coeffs_list = [] for i in tqdm(range(ntaus)): qr = smf.quantreg(formula, data) qrfit = qr.fit(q=taus[i]) coeffs = np.array(qrfit.params) res = np.array(qrfit.resid) sigmatau = sigma(data, n, taus[i], res) jacobtau = jacobian(data, n, taus[i], res, alpha) solved_jacobian = np.linalg.inv(jacobtau) V = np.dot(solved_jacobian, np.dot(sigmatau, solved_jacobian)) V_list += [V] coeffs_list += [coeffs] with Pool(4) as pool: K = pool.starmap(subsamplek, zip([formula] * ntaus, V_list, taus, coeffs_list, [data_q] * ntaus, [n] * ntaus, [b] * ntaus, [B] * ntaus, [R] * ntaus) ) K_q = pool.starmap(subsamplek, zip([formula] * ntaus, V_list, taus, coeffs_list, [data_q] * ntaus, [n] * ntaus, [b] * ntaus, [B] * ntaus, [R_q] * ntaus) ) K = np.array(np.matrix(K).T) Kmax = list(map(max, K)) Kalpha = np.percentile(Kmax, (1 - alpha) * 100) K_q = np.array(np.matrix(K_q).T) Kmax_q = list(map(max, K_q)) Kalpha_q = np.percentile(Kmax_q, (1 - alpha) * 100) ols = smf.ols(formula, data) olsfit = ols.fit() olscoeffs = np.dot(R.T, olsfit.params) olscoeffs_q = np.dot(R_q.T, olsfit.params) variables = np.array(olsfit.params.index) p = len(variables) taus = np.arange(2, 19) / 20. res_to_plot_list.append( table_rq_res(formula, taus=taus, data=data, alpha=alpha, R=R, n=n, sigma=sigma, jacobian=jacobian) ) res0_to_plot_list.append( table_rq_res(formula, taus=taus, data=data, alpha=alpha, R=R, n=n, sigma=sigma0, jacobian=jacobian) ) res_to_plot_q_list.append( table_rq_res(formula, taus=taus, data=data, alpha=alpha, R=R_q, n=n, sigma=sigma, jacobian=jacobian) ) res0_to_plot_q_list.append( table_rq_res(formula, taus=taus, data=data, alpha=alpha, R=R_q, n=n, sigma=sigma0, jacobian=jacobian) ) Kalpha_list.append(Kalpha) Kalpha_list_q.append(Kalpha_q) olscoeffs_list.append(olscoeffs) b80 = 100 * np.array(res_to_plot_list[0][0].iloc[:, 0]) ub80_p = b80 + 100 * Kalpha_list[0] * np.array(res_to_plot_list[0][1].iloc[:, 0]) ub80_m = b80 - 100 * Kalpha_list[0] * np.array(res_to_plot_list[0][1].iloc[:, 0]) b90 = 100 * np.array(res_to_plot_list[1][0].iloc[:, 0]) ub90_p = b90 + 100 * Kalpha_list[1] * np.array(res_to_plot_list[1][1].iloc[:, 0]) ub90_m = b90 - 100 * Kalpha_list[1] * np.array(res_to_plot_list[1][1].iloc[:, 0]) b00 = 100 * np.array(res_to_plot_list[2][0].iloc[:, 0]) ub00_p = b00 + 100 * Kalpha_list[2] * np.array(res_to_plot_list[2][1].iloc[:, 0]) ub00_m = b00 - 100 * Kalpha_list[2] * np.array(res_to_plot_list[2][1].iloc[:, 0]) b80_bis = np.array(res_to_plot_q_list[0][0].iloc[:, 0]) b80_bis += - np.float(res_to_plot_q_list[0][0].iloc[8, 0]) ub80_p_bis = b80_bis + Kalpha_list_q[0] * np.array(res_to_plot_q_list[0][1].iloc[:, 0]) ub80_m_bis = b80_bis - Kalpha_list_q[0] * np.array(res_to_plot_q_list[0][1].iloc[:, 0]) b90_bis = np.array(res_to_plot_q_list[1][0].iloc[:, 0]) b90_bis += - np.float(res_to_plot_q_list[1][0].iloc[8, 0]) ub90_p_bis = b90_bis + Kalpha_list_q[1] * np.array(res_to_plot_q_list[1][1].iloc[:, 0]) ub90_m_bis = b90_bis - Kalpha_list_q[1] * np.array(res_to_plot_q_list[1][1].iloc[:, 0]) b00_bis = np.array(res_to_plot_q_list[2][0].iloc[:, 0]) b00_bis += - np.float(res_to_plot_q_list[2][0].iloc[8, 0]) ub00_p_bis = b00_bis + Kalpha_list_q[2] * np.array(res_to_plot_q_list[2][1].iloc[:, 0]) ub00_m_bis = b00_bis - Kalpha_list_q[2] * np.array(res_to_plot_q_list[2][1].iloc[:, 0]) csv_df = pd.DataFrame() csv_df["taus"] = taus csv_df["b80"] = b80 csv_df["b90"] = b90 csv_df["b00"] = b00 csv_df["b80_bis"] = b80_bis csv_df["b90_bis"] = b90_bis csv_df["b00_bis"] = b00_bis csv_df["ub80_p"] = ub80_p csv_df["ub90_p"] = ub90_p csv_df["ub00_p"] = ub00_p csv_df["ub80_p_bis"] = ub80_p_bis csv_df["ub90_p_bis"] = ub90_p_bis csv_df["ub00_p_bis"] = ub00_p_bis csv_df["ub80_m"] = ub80_m csv_df["ub90_m"] = ub90_m csv_df["ub00_m"] = ub00_m csv_df["ub80_m_bis"] = ub80_m_bis csv_df["ub90_m_bis"] = ub90_m_bis csv_df["ub00_m_bis"] = ub00_m_bis csv_df.to_csv("Data/figures2.csv") else: csv_df = pd.read_csv("Data/figures2.csv") taus = csv_df["taus"] b80 = csv_df["b80"] b90 = csv_df["b90"] b00 = csv_df["b00"] b80_bis = csv_df["b80_bis"] b90_bis = csv_df["b90_bis"] b00_bis = csv_df["b00_bis"] ub80_p = csv_df["ub80_p"] ub90_p = csv_df["ub90_p"] ub00_p = csv_df["ub00_p"] ub80_p_bis = csv_df["ub80_p_bis"] ub90_p_bis = csv_df["ub90_p_bis"] ub00_p_bis = csv_df["ub00_p_bis"] ub80_m = csv_df["ub80_m"] ub90_m = csv_df["ub90_m"] ub00_m = csv_df["ub00_m"] ub80_m_bis = csv_df["ub80_m_bis"] ub90_m_bis = csv_df["ub90_m_bis"] ub00_m_bis = csv_df["ub00_m_bis"] fig, (ax1) = plt.subplots() ax1.fill_between(taus, ub80_m, ub80_p, facecolor='silver', interpolate=True, alpha=.5) ax1.fill_between(taus, ub90_m, ub90_p, facecolor='black', interpolate=True, alpha=.5) ax1.fill_between(taus, ub00_m, ub00_p, facecolor='brown', interpolate=True, alpha=.5) plot80 = ax1.plot(taus, b80, '--', label='1980', color='black') plot90 = ax1.plot(taus, b90, '--', label='1990', color='black') plot00 = ax1.plot(taus, b00, '--', label='2000', color='black') plot80_bg = ax1.fill(np.NaN, np.NaN, 'silver', alpha=0.5) plot90_bg = ax1.fill(np.NaN, np.NaN, 'black', alpha=0.5) plot00_bg = ax1.fill(np.NaN, np.NaN, 'brown', alpha=0.5) ax1.legend([(plot80_bg[0], plot80[0]), (plot90_bg[0], plot90[0]), (plot00_bg[0], plot00[0])], ['1980', '1990', '2000']) ax1.set_xlabel('Quantile Index') ax1.set_ylabel('Schooling Coefficients (%)') ax1.set_title('Schooling Coefficients') plt.show() # Second graphe fig, (ax1) = plt.subplots() plot80_bis = ax1.plot(taus, b80_bis, '--', label='1980', color='black') plot90_bis = ax1.plot(taus, b90_bis, '--', label='1990', color='black') plot00_bis = ax1.plot(taus, b00_bis, '--', label='2000', color='black') ax1.plot(taus, [0] * len(b80_bis), color='black', lw=.5) ax1.fill_between(taus, ub80_m_bis, ub80_p_bis, facecolor='silver', interpolate=True, alpha=.8) ax1.fill_between(taus, ub90_m_bis, ub90_p_bis, facecolor='black', interpolate=True, alpha=.8) ax1.fill_between(taus, ub00_m_bis, ub00_p_bis, facecolor='brown', interpolate=True, alpha=.8) plot80_bg_bis = ax1.fill(np.NaN, np.NaN, 'silver', alpha=0.8) plot90_bg_bis = ax1.fill(np.NaN, np.NaN, 'black', alpha=0.8) plot00_bg_bis = ax1.fill(np.NaN, np.NaN, 'brown', alpha=0.8) ax1.legend([(plot80_bg_bis[0], plot80_bis[0]), (plot90_bg_bis[0], plot90_bis[0]), (plot00_bg_bis[0], plot00_bis[0])], ['1980', '1990', '2000']) ax1.set_xlabel('Quantile Index') ax1.set_ylabel('Schooling Coefficients (%)') ax1.set_title('CONDITIONAL QUANTILES (at covariate means)') plt.show()
def generate_synthetic_input_batch( m_den, ln_emb, n, num_indices_per_lookup, num_indices_per_lookup_fixed, trace_file, enable_padding=False, ): # dense feature Xt = ra.rand(n, m_den).astype(np.float32) # sparse feature (sparse indices) lS_emb_lengths = [] lS_emb_indices = [] # for each embedding generate a list of n lookups, # where each lookup is composed of multiple sparse indices for i, size in enumerate(ln_emb): lS_batch_lengths = [] lS_batch_indices = [] for _ in range(n): # num of sparse indices to be used per embedding (between if num_indices_per_lookup_fixed: sparse_group_size = np.int32(num_indices_per_lookup) else: # random between [1,num_indices_per_lookup]) r = ra.random(1) sparse_group_size = np.int32( max(1, np.round(r * min(size, num_indices_per_lookup))[0]) ) # sparse indices to be used per embedding file_path = trace_file line_accesses, list_sd, cumm_sd = read_dist_from_file( file_path.replace("j", str(i)) ) # debug print # print('input') # print(line_accesses); print(list_sd); print(cumm_sd); # print(sparse_group_size) # approach 1: rand # r = trace_generate_rand( # line_accesses, list_sd, cumm_sd, sparse_group_size, enable_padding # ) # approach 2: lru r = trace_generate_lru( line_accesses, list_sd, cumm_sd, sparse_group_size, enable_padding ) # WARNING: if the distribution in the file is not consistent with # embedding table dimensions, below mod guards against out of # range access sparse_group = np.unique(r).astype(np.int32) minsg = np.min(sparse_group) maxsg = np.max(sparse_group) if (minsg < 0) or (size <= maxsg): print( "WARNING: distribution is inconsistent with embedding " + "table size (using mod to recover and continue)" ) sparse_group = np.mod(sparse_group, size).astype(np.int32) # sparse_group = np.unique(np.array(np.mod(r, size-1)).astype(np.int32)) # reset sparse_group_size in case some index duplicates were removed sparse_group_size = np.int32(sparse_group.size) # store lengths and indices lS_batch_lengths += [sparse_group_size] lS_batch_indices += sparse_group.tolist() lS_emb_lengths.append(lS_batch_lengths) lS_emb_indices.append(lS_batch_indices) return (Xt, lS_emb_lengths, lS_emb_indices)
else: # plot_x['0,0,%d'%pol][-1] /= float(len(opts.ant.split(','))) plot_x['0,0,%d'%uv['pol']].append(d*awgt) tavg = t else: if not plot_x.has_key(bl): plot_x[bl] = [] plot_x[bl].append(d) del(uv) #[d / float(len(opts.ant.split(','))) for d in plot_x['0,0,%d'%pol]] bls = plot_x.keys() if opts.lst_avg: hr = n.pi/12. #create lstbins lst_bins = n.round(n.arange(0, 2*n.pi, 2*n.pi*(42.95/a.const.sidereal_day)), 8) #42.95 is resolution of lst bin in sec. bin_indices = n.digitize(plot_t['lst'],lst_bins) for b in bls: summed = n.zeros((len(lst_bins),d.size), dtype=d.dtype) lst_wgts = n.zeros((len(lst_bins),d.size), dtype=float) n_flagged = n.zeros((len(lst_bins),d.size), dtype=float) total_summed = n.zeros((len(lst_bins),1)) frac_flagged = n.zeros((len(lst_bins),d.size), dtype=float) sum_count = 0 for i, datum in zip(bin_indices, plot_x[b]): if i >= len(lst_bins):i = len(lst_bins)-1 #print d.dtype #print i, datum, datum[0].shape, summed.shape summed[i] += datum[0].data total_summed[i] += 1. n_flagged[i] += datum[0].mask
# -*- coding:utf-8 -*- import re import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import warnings plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号 warnings.filterwarnings("ignore") #导入数据 data = pd.read_csv(r'D:\DeepLearning ER\00.csv') #总体数据信息 data.info() #data[['SEX','AGE']].groupby(['AGE']).count().plot.bar()#船舱与生存的关系 #df.plot(kind='bar', stacked=True) grouped_values = data.groupby("AGE").sum().reset_index() g = sns.countplot('AGE', hue='SEX', data=data) plt.title(u"入院年龄与性别状况") plt.xlabel(u"年龄段") plt.ylabel(u"人数") for index, row in grouped_values.iterrows(): #在柱状图上绘制该类别的数量 g.text(row.name, row.AGE, np.round(row.SEX, 2), color="black", ha="center") plt.show()