Exemple #1
0
def fetch_sources():
    app.logger.info('Fetching sources from {} sources.'.format(
        RuleSource.query.count()))
    rules = []
    for src in RuleSource.query:
        # Download rules from every source.
        app.logger.info('Downloading from "{}".'.format(src.uri))
        # If a gzip file, perform a streamed download
        # and save it to a temp file.
        stream = src.uri.endswith('gz')
        resp = requests.get(src.uri, stream=stream)
        if resp.status_code == 200:
            if stream:
                tmpdir = '/tmp/{}-{}/'.format(src.name,
                                              datetime.utcnow().isoformat())
                os.mkdir(tmpdir)
                ziprules = StringIO()
                for chunk in resp.iter_content():
                    ziprules.write(chunk)
                ziprules.seek(0)
                try:
                    zrules = tarfile.open(fileobj=ziprules, mode='r:gz')
                except tarfile.TarError as terr:
                    app.logger.warning('Error in rule file: {}\n{}'.format(
                        src.uri, str(terr)))
                else:
                    ruleslist = []
                    for member in zrules.getmembers():
                        if member.name.endswith('.rules') and member.isfile():
                            # Keep track of extracted filenames.
                            ruleslist.append(member.name)
                            zrules.extract(member, path=tmpdir)
                    # All rule files found are now extracted into tmpdir.
                    for rname in ruleslist:
                        try:
                            rulepath = os.path.join(tmpdir, rname)
                            with open(rulepath, 'rb') as rfile:
                                rules.extend(from_buffer(rfile.read()))
                            os.remove(rulepath)
                        except Exception as e:
                            app.logger.exception(
                                "Unhandled exception: {}. Continuing".format(
                                    e))
                            continue

                    # A subdirectory /rules/ is created when extracting,
                    # removing that first then the whole tmpdir.
                    os.rmdir(os.path.join(tmpdir, 'rules'))
                    os.rmdir(tmpdir)
            else:
                # rules will contain all parsed rules.
                rules.extend(from_buffer(resp.text))
        else:
            pass
    app.logger.info('Bulk importing {} rules.'.format(len(rules)))
    Rule.bulk_import(rules)
    render_rules()
Exemple #2
0
def fetch_sources():
    app.logger.info('Fetching sources from {} sources.'.format(
        RuleSource.query.count()))
    rules = []
    for src in RuleSource.query:
        # Download rules from every source.
        app.logger.info('Downloading from "{}".'.format(src.uri))
        # If a gzip file, perform a streamed download
        # and save it to a temp file.
        stream = src.uri.endswith('gz')
        resp = requests.get(src.uri, stream=stream)
        if resp.status_code == 200:
            if stream:
                tmpdir = '/tmp/{}-{}/'.format(src.name,
                                              datetime.utcnow().isoformat())
                os.mkdir(tmpdir)
                ziprules = StringIO()
                for chunk in resp.iter_content():
                    ziprules.write(chunk)
                ziprules.seek(0)
                try:
                    zrules = tarfile.open(fileobj=ziprules, mode='r:gz')
                except tarfile.TarError as terr:
                    app.logger.warning(
                        'Error in rule file: {}\n{}'.format(src.uri, str(terr)))
                else:
                    ruleslist = []
                    for member in zrules.getmembers():
                        if member.name.endswith('.rules') and member.isfile():
                            # Keep track of extracted filenames.
                            ruleslist.append(member.name)
                            zrules.extract(member, path=tmpdir)
                    # All rule files found are now extracted into tmpdir.
                    for rname in ruleslist:
                        try:
                            rulepath = os.path.join(tmpdir, rname)
                            with open(rulepath, 'rb') as rfile:
                                rules.extend(from_buffer(rfile.read()))
                            os.remove(rulepath)
                        except Exception as e:
                            app.logger.exception("Unhandled exception: {}. Continuing".format(e))
                            continue

                    # A subdirectory /rules/ is created when extracting,
                    # removing that first then the whole tmpdir.
                    os.rmdir(os.path.join(tmpdir, 'rules'))
                    os.rmdir(tmpdir)
            else:
                # rules will contain all parsed rules.
                rules.extend(from_buffer(resp.text))
        else:
            pass
    app.logger.info('Bulk importing {} rules.'.format(len(rules)))
    Rule.bulk_import(rules)
    render_rules()
Exemple #3
0
def in_memory_csv(request):
    file_obj = InMemIO()

    def fin():
        file_obj.close()

    request.addfinalizer(fin)

    for i in range(IN_MEMORY_CSV_NROWS):
        row = u'SRID=4326;POINT({lon} {lat}),{name},{age}\n'.format(
            lon=random.uniform(-170.0, 170.0),
            lat=random.uniform(-80.0, 80.0),
            name=random.choice(['fulano', 'mengano', 'zutano', 'perengano']),
            age=random.randint(18, 99))
        file_obj.write(bytearray(row, 'utf-8'))
    file_obj.seek(0)
    return file_obj
def in_memory_csv(request):
    file_obj = InMemIO()

    def fin():
        file_obj.close()

    request.addfinalizer(fin)

    for i in range(IN_MEMORY_CSV_NROWS):
        row = u'SRID=4326;POINT({lon} {lat}),{name},{age}\n'.format(
            lon=random.uniform(-170.0, 170.0),
            lat=random.uniform(-80.0, 80.0),
            name=random.choice(['fulano', 'mengano', 'zutano', 'perengano']),
            age=random.randint(18, 99)
        )
        file_obj.write(bytearray(row, 'utf-8'))
    file_obj.seek(0)
    return file_obj
Exemple #5
0
def render_html(corpus, element, value=TEXT, css=[('.*', 'background-color:yellow')], encapsulate_body=False):
    """Highlight interesting corpus elements and return them as HTML
    
    Parameters
    ----------
    element: str
        The name of the element as defined in names.py.
    value: str or func
        If string, assume elements are dictionaries and this is a valid key to extract the value.
        If function, then gives the element as the argument and expects the function to return valid string.
        If none, just use element text in filtering.
    css: list of (str, str)
        Each tuple defines a regular expression and a string containing CSS style code
        that will be applied to elements whose value is matched by the regex.
        The regexes will be tested in the order given and the CSS of the first
        matching regex will be given.
    encapsulate_body: boolean
        If True, adds HTML5 header and body to HTML.
    """
    stream = StringIO()
    if encapsulate_body:
        stream.write(HEADER.format(element))
    
    css = [(re.compile(regex), style) for regex, style in css]
    
    for root in corpus.root_elements:
        stream.write('<div>\n')
        elems = root.elements(element)
        spans = [e.span for e in elems]
        values = []
        if callable(value):
            values = [value(e) for e in elems]
        else:
            values = [e[value] for e in elems]
        styles = collect_styles(values, css)
        assert len(spans) == len(styles)
        stream.write(insert_spans(root.text, spans, styles))
        stream.write('</div>\n')
    
    if encapsulate_body:
        stream.write(FOOTER)
    
    return stream.getvalue()
  def to_precomputed(self):
    edges = self.edges.astype(np.uint32)
    vertices = self.vertices.astype(np.float32)
    
    result = BytesIO()

    # Write number of positions and edges as first two uint32s
    result.write(struct.pack('<II', vertices.size // 3, edges.size // 2))
    result.write(vertices.tobytes('C'))
    result.write(edges.tobytes('C'))

    def writeattr(attr, dtype, text):
      if attr is None:
        return

      attr = attr.astype(dtype)

      if attr.shape[0] != vertices.shape[0]:
        raise SkeletonEncodeError("Number of {} {} ({}) must match the number of vertices ({}).".format(
          dtype, text, attr.shape[0], vertices.shape[0]
        ))
      
      result.write(attr.tobytes('C'))

    for attr in self.extra_attributes:
      arr = getattr(self, attr['id'])
      writeattr(arr, np.dtype(attr['data_type']), attr['id'])

    return result.getvalue()
Exemple #7
0
  def encode(self):
    edges = self.edges.astype(np.uint32)
    vertices = self.vertices.astype(np.float32)
    
    result = BytesIO()

    # Write number of positions and edges as first two uint32s
    result.write(struct.pack('<II', vertices.size // 3, edges.size // 2))
    result.write(vertices.tobytes('C'))
    result.write(edges.tobytes('C'))

    def writeattr(attr, dtype, text):
      if attr is None:
        return

      attr = attr.astype(dtype)

      if attr.shape[0] != vertices.shape[0]:
        raise SkeletonEncodeError("Number of {} {} ({}) must match the number of vertices ({}).".format(
          dtype, text, attr.shape[0], vertices.shape[0]
        ))
      
      result.write(attr.tobytes('C'))

    writeattr(self.radii, np.float32, 'Radii')
    writeattr(self.vertex_types, np.uint8, 'SWC Vertex Types')

    return result.getvalue()