def fetch_sources(): app.logger.info('Fetching sources from {} sources.'.format( RuleSource.query.count())) rules = [] for src in RuleSource.query: # Download rules from every source. app.logger.info('Downloading from "{}".'.format(src.uri)) # If a gzip file, perform a streamed download # and save it to a temp file. stream = src.uri.endswith('gz') resp = requests.get(src.uri, stream=stream) if resp.status_code == 200: if stream: tmpdir = '/tmp/{}-{}/'.format(src.name, datetime.utcnow().isoformat()) os.mkdir(tmpdir) ziprules = StringIO() for chunk in resp.iter_content(): ziprules.write(chunk) ziprules.seek(0) try: zrules = tarfile.open(fileobj=ziprules, mode='r:gz') except tarfile.TarError as terr: app.logger.warning('Error in rule file: {}\n{}'.format( src.uri, str(terr))) else: ruleslist = [] for member in zrules.getmembers(): if member.name.endswith('.rules') and member.isfile(): # Keep track of extracted filenames. ruleslist.append(member.name) zrules.extract(member, path=tmpdir) # All rule files found are now extracted into tmpdir. for rname in ruleslist: try: rulepath = os.path.join(tmpdir, rname) with open(rulepath, 'rb') as rfile: rules.extend(from_buffer(rfile.read())) os.remove(rulepath) except Exception as e: app.logger.exception( "Unhandled exception: {}. Continuing".format( e)) continue # A subdirectory /rules/ is created when extracting, # removing that first then the whole tmpdir. os.rmdir(os.path.join(tmpdir, 'rules')) os.rmdir(tmpdir) else: # rules will contain all parsed rules. rules.extend(from_buffer(resp.text)) else: pass app.logger.info('Bulk importing {} rules.'.format(len(rules))) Rule.bulk_import(rules) render_rules()
def fetch_sources(): app.logger.info('Fetching sources from {} sources.'.format( RuleSource.query.count())) rules = [] for src in RuleSource.query: # Download rules from every source. app.logger.info('Downloading from "{}".'.format(src.uri)) # If a gzip file, perform a streamed download # and save it to a temp file. stream = src.uri.endswith('gz') resp = requests.get(src.uri, stream=stream) if resp.status_code == 200: if stream: tmpdir = '/tmp/{}-{}/'.format(src.name, datetime.utcnow().isoformat()) os.mkdir(tmpdir) ziprules = StringIO() for chunk in resp.iter_content(): ziprules.write(chunk) ziprules.seek(0) try: zrules = tarfile.open(fileobj=ziprules, mode='r:gz') except tarfile.TarError as terr: app.logger.warning( 'Error in rule file: {}\n{}'.format(src.uri, str(terr))) else: ruleslist = [] for member in zrules.getmembers(): if member.name.endswith('.rules') and member.isfile(): # Keep track of extracted filenames. ruleslist.append(member.name) zrules.extract(member, path=tmpdir) # All rule files found are now extracted into tmpdir. for rname in ruleslist: try: rulepath = os.path.join(tmpdir, rname) with open(rulepath, 'rb') as rfile: rules.extend(from_buffer(rfile.read())) os.remove(rulepath) except Exception as e: app.logger.exception("Unhandled exception: {}. Continuing".format(e)) continue # A subdirectory /rules/ is created when extracting, # removing that first then the whole tmpdir. os.rmdir(os.path.join(tmpdir, 'rules')) os.rmdir(tmpdir) else: # rules will contain all parsed rules. rules.extend(from_buffer(resp.text)) else: pass app.logger.info('Bulk importing {} rules.'.format(len(rules))) Rule.bulk_import(rules) render_rules()
def in_memory_csv(request): file_obj = InMemIO() def fin(): file_obj.close() request.addfinalizer(fin) for i in range(IN_MEMORY_CSV_NROWS): row = u'SRID=4326;POINT({lon} {lat}),{name},{age}\n'.format( lon=random.uniform(-170.0, 170.0), lat=random.uniform(-80.0, 80.0), name=random.choice(['fulano', 'mengano', 'zutano', 'perengano']), age=random.randint(18, 99)) file_obj.write(bytearray(row, 'utf-8')) file_obj.seek(0) return file_obj
def in_memory_csv(request): file_obj = InMemIO() def fin(): file_obj.close() request.addfinalizer(fin) for i in range(IN_MEMORY_CSV_NROWS): row = u'SRID=4326;POINT({lon} {lat}),{name},{age}\n'.format( lon=random.uniform(-170.0, 170.0), lat=random.uniform(-80.0, 80.0), name=random.choice(['fulano', 'mengano', 'zutano', 'perengano']), age=random.randint(18, 99) ) file_obj.write(bytearray(row, 'utf-8')) file_obj.seek(0) return file_obj
def render_html(corpus, element, value=TEXT, css=[('.*', 'background-color:yellow')], encapsulate_body=False): """Highlight interesting corpus elements and return them as HTML Parameters ---------- element: str The name of the element as defined in names.py. value: str or func If string, assume elements are dictionaries and this is a valid key to extract the value. If function, then gives the element as the argument and expects the function to return valid string. If none, just use element text in filtering. css: list of (str, str) Each tuple defines a regular expression and a string containing CSS style code that will be applied to elements whose value is matched by the regex. The regexes will be tested in the order given and the CSS of the first matching regex will be given. encapsulate_body: boolean If True, adds HTML5 header and body to HTML. """ stream = StringIO() if encapsulate_body: stream.write(HEADER.format(element)) css = [(re.compile(regex), style) for regex, style in css] for root in corpus.root_elements: stream.write('<div>\n') elems = root.elements(element) spans = [e.span for e in elems] values = [] if callable(value): values = [value(e) for e in elems] else: values = [e[value] for e in elems] styles = collect_styles(values, css) assert len(spans) == len(styles) stream.write(insert_spans(root.text, spans, styles)) stream.write('</div>\n') if encapsulate_body: stream.write(FOOTER) return stream.getvalue()
def to_precomputed(self): edges = self.edges.astype(np.uint32) vertices = self.vertices.astype(np.float32) result = BytesIO() # Write number of positions and edges as first two uint32s result.write(struct.pack('<II', vertices.size // 3, edges.size // 2)) result.write(vertices.tobytes('C')) result.write(edges.tobytes('C')) def writeattr(attr, dtype, text): if attr is None: return attr = attr.astype(dtype) if attr.shape[0] != vertices.shape[0]: raise SkeletonEncodeError("Number of {} {} ({}) must match the number of vertices ({}).".format( dtype, text, attr.shape[0], vertices.shape[0] )) result.write(attr.tobytes('C')) for attr in self.extra_attributes: arr = getattr(self, attr['id']) writeattr(arr, np.dtype(attr['data_type']), attr['id']) return result.getvalue()
def encode(self): edges = self.edges.astype(np.uint32) vertices = self.vertices.astype(np.float32) result = BytesIO() # Write number of positions and edges as first two uint32s result.write(struct.pack('<II', vertices.size // 3, edges.size // 2)) result.write(vertices.tobytes('C')) result.write(edges.tobytes('C')) def writeattr(attr, dtype, text): if attr is None: return attr = attr.astype(dtype) if attr.shape[0] != vertices.shape[0]: raise SkeletonEncodeError("Number of {} {} ({}) must match the number of vertices ({}).".format( dtype, text, attr.shape[0], vertices.shape[0] )) result.write(attr.tobytes('C')) writeattr(self.radii, np.float32, 'Radii') writeattr(self.vertex_types, np.uint8, 'SWC Vertex Types') return result.getvalue()