def clean_tokens(tokens): # Gets the contraction or default (which is just the word) if doesn't exist res = [contractions.get(word, word) for word in tokens] # Replace numbers to words res = [num2words(int(word)) if is_number(word) else word for word in res] # remove invalid tokens res = [word for word in res if all(it not in word for it in invalidtokens)] res = [word for word in res if word != "'"] # Remove profanity res = [word for word in res if not profanity.is_profanity(word)] return res
def random_sample(self, pixels, n=1e5): """ Random sample points inside a healpixel. Parameters ---------- pixels : int, ndarray pixel number or list of pixel numbers n : n Total number of randoms to draw Returns ------- lon, lat : positions of randoms (degr) """ if misc.is_number(pixels): pixels = [int(pixels)] n = int(n) # select pixels to sample if len(pixels) == 1: pix_i = np.zeros(n, dtype=int) else: pix_i = np.random.choice(len(pixels), n) # compute pixel centers theta, phi = healpy.pix2ang(self.nside, pixels, nest=self.nest) # convert to healpix projection xc, yc = self._phitheta2xy(phi, theta) # this is the size of a healpix cell in the projection step = np.pi / 2. / self.nside # generate randoms in a square x, y = np.random.uniform(-0.5, 0.5, (2, n)) * step x += xc[pix_i] y += yc[pix_i] phi_out, theta_out = self._xy2phitheta(x, y) lon = self.rad2deg * phi_out lat = 90 - self.rad2deg * theta_out return lon, lat
def select_cells(self, coarse_cell, coarse_nside, coarse_order=None): """ Returns the list of cells that fall in a (larger) cell. Notes ----- Require coarse_nside < HealpixProjector.nside Parameters ---------- coarse_cell : int cell number or list defining patch of sky coarse_nside : int nside of pixelization coarse_order : str pixelization order ('ring' or 'nest') Returns ------- list : cell indices in pixel map """ if coarse_nside == 0: return np.arange(self.npix, dtype=int) if coarse_order is None: coarse_order = self.order if coarse_nside >= self.nside: raise ValueError("coarse_nside (%s) must be lower than HealpixProjector.nside (%s)"%(coarse_nside, self.nside)) # make sure input is iterable if misc.is_number(coarse_cell): coarse_cell = [int(coarse_cell)] coarse_grid = HealpixProjector(nside=coarse_nside, order=coarse_order) coarse_map = np.zeros(coarse_grid.npix, dtype='d') for cell in coarse_cell: coarse_map[cell] = 1 map = healpy.ud_grade(coarse_map, order_in=coarse_order, order_out=self.order, nside_out=self.nside) pix = np.where(map > 0)[0] return pix
g = open('srl_general.txt', 'r') num_general = g.read().split(',') g.close() g = open('srl_general.txt', 'a') response = urllib.request.urlopen(url) data = response.read() text = data.decode('utf-8') count_new = 0 srl_arr_general = [] text_splitted = text.split('document_srl=') for i in range(1, len(text_splitted)): srl = text_splitted[i].split('">')[0].split('#comment')[0] if (is_number(srl)): if (srl not in num_notices and srl not in srl_arr_general ): # second statement : to prevent duplication srl_arr_general.append(srl) if (srl not in num_general): count_new += 1 g.write(',' + srl) print('New post found : ' + srl) g.close() if (count_new != 0): print('Started generating feed...') # make FeedGenerator fg = FeedGenerator() fg.id('asdf')
def sample(self, density=None, n=None, cell=None, nside=None, order=None, min_sample=100, max_loops=10): """ Draw longitude and latitude pairs uniformly inside the mask. By default the points are drawn from the full sphere. If a healpix cell number (or list of numbers) is given then randoms will be drawn from within those cells only. In this mode both the healpix nside parameter and ordering scheme should be given as arguments. After drawing randoms the ones that fall outside the polygon mask are discarded. Notes ----- Either density or n must be given as argument. If both are given, density will be used. Parameters ---------- density : float number density of samples (number per square degree) n : int number of samples to draw (only used if density is not given) cell : int or list optional healpix cell number or list of cell numbers nside : int healpix nside parameter nest : bool if True use Nest otherwise use Ring ordering Returns ------- lon, lat : random coordinates Raises ------ ValueError : if neither density or n are given TypeError : if n cannot be cast to integer type """ if density is None and n is None: raise ValueError( "sample has missing required argument. Please pass density or n" ) if (density is not None): try: float(density) except ValueError: raise ValueError("Sample density must be a number, not '%s'" % str(density)) if density < 0 or not np.isfinite(density): raise ValueError("Sample density must be positive, not '%s'" % str(density)) if n is not None: try: float(n) except ValueError: raise ValueError("Sample n must be a number, not '%s'" % str(n)) if n < 0 or not np.isfinite(n): raise ValueError("Sample n must be positive, not %s" % str(n)) if self.params['pixel_mask'] is None: self._build_pixel_mask() if cell is None: cell = self.params['survey_cells'] # full sky else: # sample only selected patches defined by a healpix cell cell = self.grid.select_cells(cell, nside, order) sel = self.params['pixel_mask'][cell] > 0 cell = cell[sel] if len(cell) == 0: # if there are no cells return empty arrays return np.array([]), np.array([]) if misc.is_number(cell): n_cells = 1 cell = int(cell) else: n_cells = len(cell) density_mode = False if density is not None and density >= 0: density_mode = True n = int(SPHERE_AREA * 1. / self.grid.npix * n_cells * density) try: n = int(n) except ValueError: raise ValueError("Sample count must be a number, not '%s'" % type(n)) if n < 0: raise ValueError("Sample count must be greater than 0, not %s" % str(n)) if n == 0: return np.array([]), np.array([]) lon_out = [] lat_out = [] count = 0 loop = 0 while count < n: remaining = max(min_sample, n - count) lon, lat = self.grid.random_sample(cell, remaining) sel = self.contains(lon, lat) lon, lat = lon[sel], lat[sel] count += len(lon) lon_out.append(lon) lat_out.append(lat) if density_mode: break if loop > max_loops: raise Exception("sample hit max loops! %i" % max_loops) lon_out = np.concatenate(lon_out) lat_out = np.concatenate(lat_out) if not density_mode: lon_out = lon_out[:n] lat_out = lat_out[:n] return lon_out, lat_out
f = open('srl_notices.txt', 'r') num = f.read().split(',') f.close() f = open('srl_notices.txt', 'a') response = urllib.request.urlopen(url) data = response.read() text = data.decode('utf-8') count_new = 0 srl_arr = [] text_splitted = text.split('document_srl=') for i in range(1, len(text_splitted)): srl = text_splitted[i].split('">')[0].split('#comment')[0] if (is_number(srl) and srl not in srl_arr): # second statement : to prevent duplication srl_arr.append(srl) if (srl not in num): count_new += 1 f.write(',' + srl) print('New post found : ' + srl) f.close() if (count_new != 0): print('Started generating feed...') # make FeedGenerator fg = FeedGenerator() fg.id('asdf') fg.title('SNU Physics Board RSS feed - notices')